github.com/cnotch/ipchub@v1.1.0/av/format/flv/videodata.go (about) 1 // Copyright (c) 2019,CAOHONGJU All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 package flv 6 7 import ( 8 "encoding/binary" 9 "errors" 10 "fmt" 11 12 "github.com/cnotch/ipchub/av/codec/hevc" 13 ) 14 15 // E.4.3.1 VIDEODATA 16 // Frame Type UB [4] 17 // Type of video frame. The following values are defined: 18 // 1 = key frame (for AVC, a seekable frame) 19 // 2 = inter frame (for AVC, a non-seekable frame) 20 // 3 = disposable inter frame (H.263 only) 21 // 4 = generated key frame (reserved for server use only) 22 // 5 = video info/command frame 23 const ( 24 FrameTypeReserved = 0 25 FrameTypeKeyFrame = 1 // video h264 key frame 26 FrameTypeInterFrame = 2 // video h264 inter frame 27 FrameTypeDisposableInterFrame = 3 28 FrameTypeGeneratedKeyFrame = 4 29 FrameTypeVideoInfoFrame = 5 30 FrameTypeReserved1 = 6 31 ) 32 33 // E.4.3.1 VIDEODATA 34 // CodecID UB [4] 35 // Codec Identifier. The following values are defined: 36 // 2 = Sorenson H.263 37 // 3 = Screen video 38 // 4 = On2 VP6 39 // 5 = On2 VP6 with alpha channel 40 // 6 = Screen video version 2 41 // 7 = AVC / H264 42 // 13 = HEVC / H265 43 const ( 44 CodecIDReserved = 0 45 CodecIDReserved1 = 1 46 CodecIDSorensonH263 = 2 47 CodecIDScreenVideo = 3 48 CodecIDOn2VP6 = 4 49 CodecIDOn2VP6WithAlphaChannel = 5 50 CodecIDScreenVideoVersion2 = 6 51 CodecIDAVC = 7 // h264 52 CodecIDReserved2 = 8 53 CodecIDHEVC = 12 // 事实扩展标识 h265 54 ) 55 56 // CodecIDName 视频编解码器名称 57 func CodecIDName(codecID int32) string { 58 switch codecID { 59 case CodecIDSorensonH263: 60 return "H263" 61 case CodecIDScreenVideo: 62 return "ScreenVideo" 63 case CodecIDOn2VP6: 64 return "On2VP6" 65 case CodecIDOn2VP6WithAlphaChannel: 66 return "On2VP6WithAlphaChannel" 67 case CodecIDScreenVideoVersion2: 68 return "ScreenVideoVersion2" 69 case CodecIDAVC: 70 return "H264" 71 case CodecIDHEVC: 72 return "H265" 73 default: 74 return "" 75 } 76 } 77 78 // H2645PacketType IF CodecID == 7 or 12 UI8 79 // The following values are defined: 80 // 0 = AVC sequence header 81 // 1 = AVC NALU 82 // 2 = AVC end of sequence (lower level NALU sequence ender is 83 // not required or supported) 84 const ( 85 H2645PacketTypeSequenceHeader = 0 86 H2645PacketTypeNALU = 1 87 H2645PacketTypeSequenceHeaderEOF = 2 88 H2645PacketTypeReserved = 3 89 ) 90 91 // VideoData flv Tag 中的的视频数据 92 // 93 // 对于 CodecID == CodecIDAVC,Body 值: 94 // IF AVCPacketType == AVCPacketTypeSequenceHeader 95 // AVCDecoderConfigurationRecord 96 // ELSE 97 // One or more NALUs (Full frames are required) 98 // 99 // 对于 CodecID == CodecIDAVC,Body 值: 100 // IF H2645PacketType == H2645PacketTypeSequenceHeader 101 // AVCDecoderConfigurationRecord 102 // ELSE 103 // One or more NALUs (Full frames are required) 104 // 105 // 对于 CodecID == CodecIDHEVC,Body 值: 106 // IF H2645PacketType == H2645PacketTypeSequenceHeader 107 // HEVCDecoderConfigurationRecord 108 // ELSE 109 // One or more NALUs (Full frames are required) 110 type VideoData struct { 111 FrameType byte // 4 bits; 帧类型 112 CodecID byte // 4 bits; 编解码器标识 113 H2645PacketType byte // 8 bits; 仅 AVC/HEVC 编码有效,AVC 包类型 114 CompositionTime uint32 // 24 bits; 仅 AVC/HEVC 编码有效,表示PTS 与 DTS 的时间偏移值,单位 ms,记作 CTS。 115 Body []byte // 原始视频 116 } 117 118 var _ TagData = &VideoData{} 119 120 // Unmarshal . 121 // Note: Unmarshal not copy the data 122 func (videoData *VideoData) Unmarshal(data []byte) error { 123 if len(data) < 1 { 124 return errors.New("data.length < 1") 125 } 126 127 offset := 0 128 129 videoData.FrameType = data[offset] >> 4 130 videoData.CodecID = data[offset] & 0x0f 131 offset++ 132 133 if videoData.CodecID == CodecIDAVC || videoData.CodecID == CodecIDHEVC { 134 if len(data) < 5 { 135 return errors.New("data.length < 5") 136 } 137 temp := binary.BigEndian.Uint32(data[offset:]) 138 videoData.H2645PacketType = byte(temp >> 24) 139 videoData.CompositionTime = temp & 0x00ffffff 140 offset += 4 141 142 if videoData.H2645PacketType == H2645PacketTypeNALU { 143 if len(data) < 9 { 144 return errors.New("data.length < 9") 145 } 146 size := int(binary.BigEndian.Uint32(data[offset:])) 147 offset += 4 148 if size > len(data)-offset { 149 return fmt.Errorf("data.length < %d", size+offset) 150 } 151 } 152 } 153 154 videoData.Body = data[offset:] 155 return nil 156 } 157 158 // MarshalSize . 159 func (videoData *VideoData) MarshalSize() int { 160 if videoData.H2645PacketType == H2645PacketTypeNALU { 161 return 9 + len(videoData.Body) 162 } 163 return 5 + len(videoData.Body) 164 } 165 166 // Marshal . 167 func (videoData *VideoData) Marshal() ([]byte, error) { 168 buff := make([]byte, videoData.MarshalSize()) 169 offset := 0 170 buff[offset] = (videoData.FrameType << 4) | (videoData.CodecID & 0x0f) 171 172 offset++ 173 if videoData.CodecID == CodecIDAVC || videoData.CodecID == CodecIDHEVC { 174 binary.BigEndian.PutUint32(buff[offset:], 175 (uint32(videoData.H2645PacketType)<<24)|(videoData.CompositionTime&0x00ffffff)) 176 offset += 4 177 178 if videoData.H2645PacketType == H2645PacketTypeNALU { 179 binary.BigEndian.PutUint32(buff[offset:], uint32(len(videoData.Body))) 180 offset += 4 181 } 182 } 183 184 offset += copy(buff[offset:], videoData.Body) 185 186 return buff[:offset], nil 187 } 188 189 // AVCDecoderConfigurationRecord . 190 // aligned(8) class AVCDecoderConfigurationRecord { 191 // unsigned int(8) configurationVersion = 1; 192 // unsigned int(8) AVCProfileIndication; 193 // unsigned int(8) profile_compatibility; 194 // unsigned int(8) AVCLevelIndication; 195 196 // bit(6) reserved = '111111'b; 197 // unsigned int(2) lengthSizeMinusOne; 198 199 // bit(3) reserved = '111'b; 200 // unsigned int(5) numOfSequenceParameterSets; 201 // 202 // for (i=0; i< numOfSequenceParameterSets; i++) { 203 // unsigned int(16) sequenceParameterSetLength ; 204 // bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit; 205 // } 206 // unsigned int(8) numOfPictureParameterSets; 207 // for (i=0; i< numOfPictureParameterSets; i++) { 208 // unsigned int(16) pictureParameterSetLength; 209 // bit(8*pictureParameterSetLength) pictureParameterSetNALUnit; 210 // } 211 // } 212 type AVCDecoderConfigurationRecord struct { 213 ConfigurationVersion byte 214 AVCProfileIndication byte 215 ProfileCompatibility byte 216 AVCLevelIndication byte 217 SPS []byte 218 PPS []byte 219 } 220 221 // NewAVCDecoderConfigurationRecord creates and initializes a new AVCDecoderConfigurationRecord 222 func NewAVCDecoderConfigurationRecord(sps, pps []byte) *AVCDecoderConfigurationRecord { 223 return &AVCDecoderConfigurationRecord{ 224 ConfigurationVersion: 1, 225 AVCProfileIndication: sps[1], 226 ProfileCompatibility: sps[2], 227 AVCLevelIndication: sps[3], 228 SPS: sps, 229 PPS: pps, 230 } 231 } 232 233 // Unmarshal . 234 // Note: Unmarshal not copy the data 235 func (record *AVCDecoderConfigurationRecord) Unmarshal(data []byte) error { 236 if len(data) < 11 { 237 return errors.New("data.length < 11") 238 } 239 240 offset := 0 241 242 record.ConfigurationVersion = data[offset] 243 offset++ 244 245 record.AVCProfileIndication = data[offset] 246 offset++ 247 248 record.ProfileCompatibility = data[offset] 249 offset++ 250 251 record.AVCLevelIndication = data[offset] 252 offset++ 253 254 offset += 2 // 255 256 spsLen := binary.BigEndian.Uint16(data[offset:]) 257 offset += 2 258 259 if len(data) < 11+int(spsLen) { 260 return errors.New("Insufficient Data: SPS") 261 } 262 record.SPS = data[offset : offset+int(spsLen)] 263 offset += int(spsLen) 264 265 offset++ 266 267 ppsLen := binary.BigEndian.Uint16(data[offset:]) 268 offset += 2 269 270 if len(data) < 11+int(spsLen)+int(ppsLen) { 271 return errors.New("Insufficient Data: PPS") 272 } 273 record.PPS = data[offset : offset+int(ppsLen)] 274 return nil 275 } 276 277 // MarshalSize . 278 func (record *AVCDecoderConfigurationRecord) MarshalSize() int { 279 return 4 + 2 + 2 + len(record.SPS) + 1 + 2 + len(record.PPS) 280 } 281 282 // Marshal . 283 func (record *AVCDecoderConfigurationRecord) Marshal() ([]byte, error) { 284 buff := make([]byte, record.MarshalSize()) 285 286 offset := 0 287 288 buff[offset] = record.ConfigurationVersion 289 offset++ 290 291 buff[offset] = record.AVCProfileIndication 292 offset++ 293 294 buff[offset] = record.ProfileCompatibility 295 offset++ 296 297 buff[offset] = record.AVCLevelIndication 298 offset++ 299 300 // lengthSizeMinusOne 是 H.264 视频中 NALU 的长度, 301 // 计算方法是 1 + (lengthSizeMinusOne & 3),实际计算结果一直是4 302 buff[offset] = 0xff 303 offset++ 304 305 // numOfSequenceParameterSets SPS 的个数,计算方法是 numOfSequenceParameterSets & 0x1F, 306 // 实际计算结果一直为1 307 buff[offset] = 0xe1 308 offset++ 309 310 // sequenceParameterSetLength SPS 的长度 311 binary.BigEndian.PutUint16(buff[offset:], uint16(len(record.SPS))) 312 offset += 2 313 314 // SPS data 315 offset += copy(buff[offset:], record.SPS) 316 317 // numOfPictureParameterSets PPS 的个数 318 buff[offset] = 0x01 319 offset++ 320 321 // pictureParameterSetLength SPS 的长度 322 binary.BigEndian.PutUint16(buff[offset:], uint16(len(record.PPS))) 323 offset += 2 324 325 // PPS data 326 offset += copy(buff[offset:], record.PPS) 327 328 return buff, nil 329 } 330 331 // HEVCDecoderConfigurationRecord . 332 type HEVCDecoderConfigurationRecord struct { 333 ConfigurationVersion uint8 334 335 GeneralProfileSpace uint8 336 GeneralTierFlag uint8 337 GeneralProfileIDC uint8 338 GeneralProfileCompatibilityFlags uint32 339 GeneralConstraintIndicatorFlags uint64 340 GeneralLevelIDC uint8 341 342 LengthSizeMinusOne uint8 343 344 MaxSubLayers uint8 345 TemporalIdNestingFlag uint8 346 347 ChromaFormatIDC uint8 348 BitDepthLumaMinus8 uint8 349 BitDepthChromaMinus8 uint8 350 351 VPS []byte 352 SPS []byte 353 PPS []byte 354 } 355 356 // NewHEVCDecoderConfigurationRecord creates and initializes a new HEVCDecoderConfigurationRecord 357 func NewHEVCDecoderConfigurationRecord(vps, sps, pps []byte) *HEVCDecoderConfigurationRecord { 358 record := &HEVCDecoderConfigurationRecord{ 359 ConfigurationVersion: 1, 360 LengthSizeMinusOne: 3, // 4 bytes 361 GeneralProfileCompatibilityFlags: 0xffffffff, 362 GeneralConstraintIndicatorFlags: 0xffffffffffff, 363 VPS: vps, 364 SPS: sps, 365 PPS: pps, 366 } 367 368 record.init() 369 return record 370 } 371 372 func (record *HEVCDecoderConfigurationRecord) init() error { 373 var rawVps hevc.H265RawVPS 374 if err := rawVps.Decode(record.VPS); err != nil { 375 return err 376 } 377 if rawVps.Vps_max_sub_layers_minus1+1 > record.MaxSubLayers { 378 record.MaxSubLayers = rawVps.Vps_max_sub_layers_minus1 + 1 379 } 380 record.applyPLT(&rawVps.Profile_tier_level) 381 382 var rawSps hevc.H265RawSPS 383 if err := rawSps.Decode(record.SPS); err != nil { 384 return err 385 } 386 if rawSps.Sps_max_sub_layers_minus1+1 > record.MaxSubLayers { 387 record.MaxSubLayers = rawSps.Sps_max_sub_layers_minus1 + 1 388 } 389 390 // sps_temporal_id_nesting_flag 391 record.TemporalIdNestingFlag = rawSps.Sps_temporal_id_nesting_flag 392 record.applyPLT(&rawSps.Profile_tier_level) 393 394 record.ChromaFormatIDC = rawSps.Chroma_format_idc 395 record.BitDepthLumaMinus8 = rawSps.Bit_depth_luma_minus8 396 record.BitDepthChromaMinus8 = rawSps.Bit_depth_chroma_minus8 397 398 return nil 399 } 400 401 func (record *HEVCDecoderConfigurationRecord) applyPLT(ptl *hevc.H265RawProfileTierLevel) { 402 record.GeneralProfileSpace = ptl.General_profile_space 403 404 if ptl.General_tier_flag > record.GeneralTierFlag { 405 record.GeneralLevelIDC = ptl.General_level_idc 406 407 record.GeneralTierFlag = ptl.General_tier_flag 408 } else { 409 if ptl.General_level_idc > record.GeneralLevelIDC { 410 record.GeneralLevelIDC = ptl.General_level_idc 411 } 412 } 413 414 if ptl.General_profile_idc > record.GeneralProfileIDC { 415 record.GeneralProfileIDC = ptl.General_profile_idc 416 } 417 418 record.GeneralProfileCompatibilityFlags &= ptl.GeneralProfileCompatibilityFlags 419 record.GeneralConstraintIndicatorFlags &= ptl.GeneralConstraintIndicatorFlags 420 } 421 422 // Unmarshal . 423 func (record *HEVCDecoderConfigurationRecord) Unmarshal(data []byte) error { 424 if len(data) < 23 { 425 return errors.New("data.length < 23") 426 } 427 offset := 0 428 429 // unsigned int(8) configurationVersion = 1; 430 record.ConfigurationVersion = data[offset] 431 offset++ 432 433 // unsigned int(2) general_profile_space; 434 // unsigned int(1) general_tier_flag; 435 // unsigned int(5) general_profile_idc; 436 record.GeneralProfileSpace = data[offset] >> 6 437 record.GeneralTierFlag = (data[offset] >> 5) & 0x01 438 record.GeneralProfileIDC = data[offset] & 0x1f 439 offset++ 440 441 // unsigned int(32) general_profile_compatibility_flags 442 record.GeneralProfileCompatibilityFlags = binary.BigEndian.Uint32(data[offset:]) 443 offset += 4 444 445 // unsigned int(48) general_constraint_indicator_flags 446 record.GeneralConstraintIndicatorFlags = uint64(binary.BigEndian.Uint32(data[offset:])) 447 record.GeneralConstraintIndicatorFlags <<= 16 448 offset += 4 449 record.GeneralConstraintIndicatorFlags |= uint64(binary.BigEndian.Uint16(data[offset:])) 450 offset += 2 451 452 // unsigned int(8) general_level_idc; 453 record.GeneralLevelIDC = data[offset] 454 offset++ 455 456 // bit(4) reserved = ‘1111’b; 457 // unsigned int(12) min_spatial_segmentation_idc; 458 // bit(6) reserved = ‘111111’b; 459 // unsigned int(2) parallelismType; 460 offset += 2 461 offset++ 462 463 // bit(6) reserved = ‘111111’b; 464 // unsigned int(2) chromaFormat; 465 record.ChromaFormatIDC = data[offset] & 0x03 466 offset++ 467 468 // bit(5) reserved = ‘11111’b; 469 // unsigned int(3) bitDepthLumaMinus8; 470 record.BitDepthLumaMinus8 = data[offset] & 0x07 471 offset++ 472 473 // bit(5) reserved = ‘11111’b; 474 // unsigned int(3) bitDepthChromaMinus8; 475 record.BitDepthChromaMinus8 = data[offset] & 0x07 476 offset++ 477 478 // bit(16) avgFrameRate; 479 offset += 2 480 481 // bit(2) constantFrameRate; 482 // bit(3) MaxSubLayers; 483 // bit(1) temporalIdNested; 484 // unsigned int(2) lengthSizeMinusOne; 485 record.MaxSubLayers = (data[offset] >> 3) & 0x07 486 record.TemporalIdNestingFlag = (data[offset] >> 2) & 0x01 487 record.LengthSizeMinusOne = data[offset] & 0x03 488 offset++ 489 490 // num of vps sps pps 491 numNals := int(data[offset]) 492 offset++ 493 494 for i := 0; i < numNals; i++ { 495 if len(data) < offset+5 { 496 return errors.New("Insufficient data") 497 } 498 nalType := data[offset] 499 offset++ 500 501 // num of vps 502 num := binary.BigEndian.Uint16(data[offset:]) 503 offset += 2 504 505 // length 506 length := binary.BigEndian.Uint16(data[offset:]) 507 offset += 2 508 if num != 1 { 509 return errors.New("Multiple VPS or SPS or PPS NAL is not supported") 510 } 511 if len(data) < offset+int(length) { 512 return errors.New("Insufficient raw data") 513 } 514 raw := data[offset : offset+int(length)] 515 offset += int(length) 516 switch nalType { 517 case hevc.NalVps: 518 record.VPS = raw 519 case hevc.NalSps: 520 record.SPS = raw 521 case hevc.NalPps: 522 record.PPS = raw 523 default: 524 return errors.New("Only VPS SPS PPS NAL is supported") 525 } 526 } 527 return nil 528 } 529 530 // MarshalSize . 531 func (record *HEVCDecoderConfigurationRecord) MarshalSize() int { 532 return 23 + 5 + len(record.VPS) + 5 + len(record.SPS) + 5 + len(record.PPS) 533 } 534 535 // Marshal . 536 func (record *HEVCDecoderConfigurationRecord) Marshal() ([]byte, error) { 537 buff := make([]byte, record.MarshalSize()) 538 offset := 0 539 540 // unsigned int(8) configurationVersion = 1; 541 buff[offset] = 0x1 542 offset++ 543 544 // unsigned int(2) general_profile_space; 545 // unsigned int(1) general_tier_flag; 546 // unsigned int(5) general_profile_idc; 547 buff[offset] = record.GeneralProfileSpace<<6 | record.GeneralTierFlag<<5 | record.GeneralProfileIDC 548 offset++ 549 550 // unsigned int(32) general_profile_compatibility_flags 551 binary.BigEndian.PutUint32(buff[offset:], record.GeneralProfileCompatibilityFlags) 552 offset += 4 553 554 // unsigned int(48) general_constraint_indicator_flags 555 binary.BigEndian.PutUint32(buff[offset:], uint32(record.GeneralConstraintIndicatorFlags>>16)) 556 offset += 4 557 binary.BigEndian.PutUint16(buff[offset:], uint16(record.GeneralConstraintIndicatorFlags)) 558 offset += 2 559 560 // unsigned int(8) general_level_idc; 561 buff[offset] = record.GeneralLevelIDC 562 offset++ 563 564 // bit(4) reserved = ‘1111’b; 565 // unsigned int(12) min_spatial_segmentation_idc; 566 // bit(6) reserved = ‘111111’b; 567 // unsigned int(2) parallelismType; 568 // TODO chef: 这两个字段没有解析 569 binary.BigEndian.PutUint16(buff[offset:], 0xf000) 570 offset += 2 571 buff[offset] = 0xfc 572 offset++ 573 574 // bit(6) reserved = ‘111111’b; 575 // unsigned int(2) chromaFormat; 576 buff[offset] = record.ChromaFormatIDC | 0xfc 577 offset++ 578 579 // bit(5) reserved = ‘11111’b; 580 // unsigned int(3) bitDepthLumaMinus8; 581 buff[offset] = record.BitDepthLumaMinus8 | 0xf8 582 offset++ 583 584 // bit(5) reserved = ‘11111’b; 585 // unsigned int(3) bitDepthChromaMinus8; 586 buff[offset] = record.BitDepthChromaMinus8 | 0xf8 587 offset++ 588 589 // bit(16) avgFrameRate; 590 binary.BigEndian.PutUint16(buff[offset:], 0) 591 offset += 2 592 593 // bit(2) constantFrameRate; 594 // bit(3) numTemporalLayers; 595 // bit(1) temporalIdNested; 596 // unsigned int(2) lengthSizeMinusOne; 597 buff[offset] = 0<<6 | record.MaxSubLayers<<3 | record.TemporalIdNestingFlag<<2 | record.LengthSizeMinusOne 598 offset++ 599 600 // num of vps sps pps 601 buff[offset] = 0x03 602 offset++ 603 604 pset := []struct { 605 nalType uint8 606 data []byte 607 }{ 608 {hevc.NalVps, record.VPS}, 609 {hevc.NalSps, record.SPS}, 610 {hevc.NalPps, record.PPS}, 611 } 612 for _, ps := range pset { 613 buff[offset] = ps.nalType 614 offset++ 615 616 // num of vps 617 binary.BigEndian.PutUint16(buff[offset:], 1) 618 offset += 2 619 620 // length 621 binary.BigEndian.PutUint16(buff[offset:], uint16(len(ps.data))) 622 offset += 2 623 624 copy(buff[offset:], ps.data) 625 offset += len(ps.data) 626 } 627 628 return buff, nil 629 }