1/**********
2This library is free software; you can redistribute it and/or modify it under
3the terms of the GNU Lesser General Public License as published by the
4Free Software Foundation; either version 3 of the License, or (at your
5option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.)
6
7This library is distributed in the hope that it will be useful, but WITHOUT
8ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
10more details.
11
12You should have received a copy of the GNU Lesser General Public License
13along with this library; if not, write to the Free Software Foundation, Inc.,
1451 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15**********/
16// "liveMedia"
17// Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved.
18// A filter that breaks up a H.264 or H.265 Video Elementary Stream into NAL units.
19// Implementation
20
21#include "H264or5VideoStreamFramer.hh"
22#include "MPEGVideoStreamParser.hh"
23#include "BitVector.hh"
24
25////////// H264or5VideoStreamParser definition //////////
26
27class H264or5VideoStreamParser: public MPEGVideoStreamParser {
28public:
29 H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
30 FramedSource* inputSource, Boolean includeStartCodeInOutput);
31 virtual ~H264or5VideoStreamParser();
32
33private: // redefined virtual functions:
34 virtual void flushInput();
35 virtual unsigned parse();
36
37private:
38 H264or5VideoStreamFramer* usingSource() {
39 return (H264or5VideoStreamFramer*)fUsingSource;
40 }
41
42 Boolean isVPS(u_int8_t nal_unit_type) { return usingSource()->isVPS(nal_unit_type); }
43 Boolean isSPS(u_int8_t nal_unit_type) { return usingSource()->isSPS(nal_unit_type); }
44 Boolean isPPS(u_int8_t nal_unit_type) { return usingSource()->isPPS(nal_unit_type); }
45 Boolean isVCL(u_int8_t nal_unit_type) { return usingSource()->isVCL(nal_unit_type); }
46 Boolean isSEI(u_int8_t nal_unit_type);
47 Boolean isEOF(u_int8_t nal_unit_type);
48 Boolean usuallyBeginsAccessUnit(u_int8_t nal_unit_type);
49
50 void removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize);
51
52 void analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
53 void analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
54 void profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1);
55 void analyze_vui_parameters(BitVector& bv, unsigned& num_units_in_tick, unsigned& time_scale);
56 void analyze_hrd_parameters(BitVector& bv);
57 void analyze_sei_data(u_int8_t nal_unit_type);
58 void analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload);
59
60private:
61 int fHNumber; // 264 or 265
62 unsigned fOutputStartCodeSize;
63 Boolean fHaveSeenFirstStartCode, fHaveSeenFirstByteOfNALUnit;
64 u_int8_t fFirstByteOfNALUnit;
65 double fParsedFrameRate;
66 // variables set & used in the specification:
67 unsigned cpb_removal_delay_length_minus1, dpb_output_delay_length_minus1;
68 Boolean CpbDpbDelaysPresentFlag, pic_struct_present_flag;
69 double DeltaTfiDivisor;
70};
71
72
73////////// H264or5VideoStreamFramer implementation //////////
74
75H264or5VideoStreamFramer
76::H264or5VideoStreamFramer(int hNumber, UsageEnvironment& env, FramedSource* inputSource,
77 Boolean createParser,
78 Boolean includeStartCodeInOutput, Boolean insertAccessUnitDelimiters)
79 : MPEGVideoStreamFramer(env, inputSource),
80 fHNumber(hNumber), fIncludeStartCodeInOutput(includeStartCodeInOutput),
81 fInsertAccessUnitDelimiters(insertAccessUnitDelimiters),
82 fLastSeenVPS(NULL), fLastSeenVPSSize(0),
83 fLastSeenSPS(NULL), fLastSeenSPSSize(0),
84 fLastSeenPPS(NULL), fLastSeenPPSSize(0) {
85 fParser = createParser
86 ? new H264or5VideoStreamParser(hNumber, this, inputSource, includeStartCodeInOutput)
87 : NULL;
88 fNextPresentationTime = fPresentationTimeBase;
89 fFrameRate = 25.0; // We assume a frame rate of 25 fps, unless we learn otherwise (from parsing a VPS or SPS NAL unit)
90}
91
92H264or5VideoStreamFramer::~H264or5VideoStreamFramer() {
93 delete[] fLastSeenPPS;
94 delete[] fLastSeenSPS;
95 delete[] fLastSeenVPS;
96}
97
98#define VPS_MAX_SIZE 1000 // larger than the largest possible VPS (Video Parameter Set) NAL unit
99
100void H264or5VideoStreamFramer::saveCopyOfVPS(u_int8_t* from, unsigned size) {
101 if (from == NULL) return;
102 delete[] fLastSeenVPS;
103 fLastSeenVPS = new u_int8_t[size];
104 memmove(fLastSeenVPS, from, size);
105
106 fLastSeenVPSSize = size;
107}
108
109#define SPS_MAX_SIZE 1000 // larger than the largest possible SPS (Sequence Parameter Set) NAL unit
110
111void H264or5VideoStreamFramer::saveCopyOfSPS(u_int8_t* from, unsigned size) {
112 if (from == NULL) return;
113 delete[] fLastSeenSPS;
114 fLastSeenSPS = new u_int8_t[size];
115 memmove(fLastSeenSPS, from, size);
116
117 fLastSeenSPSSize = size;
118}
119
120void H264or5VideoStreamFramer::saveCopyOfPPS(u_int8_t* from, unsigned size) {
121 if (from == NULL) return;
122 delete[] fLastSeenPPS;
123 fLastSeenPPS = new u_int8_t[size];
124 memmove(fLastSeenPPS, from, size);
125
126 fLastSeenPPSSize = size;
127}
128
129Boolean H264or5VideoStreamFramer::isVPS(u_int8_t nal_unit_type) {
130 // VPS NAL units occur in H.265 only:
131 return fHNumber == 265 && nal_unit_type == 32;
132}
133
134Boolean H264or5VideoStreamFramer::isSPS(u_int8_t nal_unit_type) {
135 return fHNumber == 264 ? nal_unit_type == 7 : nal_unit_type == 33;
136}
137
138Boolean H264or5VideoStreamFramer::isPPS(u_int8_t nal_unit_type) {
139 return fHNumber == 264 ? nal_unit_type == 8 : nal_unit_type == 34;
140}
141
142Boolean H264or5VideoStreamFramer::isVCL(u_int8_t nal_unit_type) {
143 return fHNumber == 264
144 ? (nal_unit_type <= 5 && nal_unit_type > 0)
145 : (nal_unit_type <= 31);
146}
147
148void H264or5VideoStreamFramer::doGetNextFrame() {
149 if (fInsertAccessUnitDelimiters && pictureEndMarker()) {
150 // Deliver an "access_unit_delimiter" NAL unit instead:
151 unsigned const startCodeSize = fIncludeStartCodeInOutput ? 4: 0;
152 unsigned const audNALSize = fHNumber == 264 ? 2 : 3;
153
154 fFrameSize = startCodeSize + audNALSize;
155 if (fFrameSize > fMaxSize) { // there's no space
156 fNumTruncatedBytes = fFrameSize - fMaxSize;
157 fFrameSize = fMaxSize;
158 handleClosure();
159 return;
160 }
161
162 if (fIncludeStartCodeInOutput) {
163 *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x01;
164 }
165 if (fHNumber == 264) {
166 *fTo++ = 9; // "Access unit delimiter" nal_unit_type
167 *fTo++ = 0xF0; // "primary_pic_type" (7); "rbsp_trailing_bits()"
168 } else { // H.265
169 *fTo++ = 35<<1; // "Access unit delimiter" nal_unit_type
170 *fTo++ = 0; // "nuh_layer_id" (0); "nuh_temporal_id_plus1" (0) (Is this correct??)
171 *fTo++ = 0x50; // "pic_type" (2); "rbsp_trailing_bits()" (Is this correct??)
172 }
173
174 pictureEndMarker() = False; // for next time
175 afterGetting(this);
176 } else {
177 // Do the normal delivery of a NAL unit from the parser:
178 MPEGVideoStreamFramer::doGetNextFrame();
179 }
180}
181
182
183////////// H264or5VideoStreamParser implementation //////////
184
185H264or5VideoStreamParser
186::H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
187 FramedSource* inputSource, Boolean includeStartCodeInOutput)
188 : MPEGVideoStreamParser(usingSource, inputSource),
189 fHNumber(hNumber), fOutputStartCodeSize(includeStartCodeInOutput ? 4 : 0), fHaveSeenFirstStartCode(False), fHaveSeenFirstByteOfNALUnit(False), fParsedFrameRate(0.0),
190 cpb_removal_delay_length_minus1(23), dpb_output_delay_length_minus1(23),
191 CpbDpbDelaysPresentFlag(0), pic_struct_present_flag(0),
192 DeltaTfiDivisor(2.0) {
193}
194
195H264or5VideoStreamParser::~H264or5VideoStreamParser() {
196}
197
198#define PREFIX_SEI_NUT 39 // for H.265
199#define SUFFIX_SEI_NUT 40 // for H.265
200Boolean H264or5VideoStreamParser::isSEI(u_int8_t nal_unit_type) {
201 return fHNumber == 264
202 ? nal_unit_type == 6
203 : (nal_unit_type == PREFIX_SEI_NUT || nal_unit_type == SUFFIX_SEI_NUT);
204}
205
206Boolean H264or5VideoStreamParser::isEOF(u_int8_t nal_unit_type) {
207 // "end of sequence" or "end of (bit)stream"
208 return fHNumber == 264
209 ? (nal_unit_type == 10 || nal_unit_type == 11)
210 : (nal_unit_type == 36 || nal_unit_type == 37);
211}
212
213Boolean H264or5VideoStreamParser::usuallyBeginsAccessUnit(u_int8_t nal_unit_type) {
214 return fHNumber == 264
215 ? (nal_unit_type >= 6 && nal_unit_type <= 9) || (nal_unit_type >= 14 && nal_unit_type <= 18)
216 : (nal_unit_type >= 32 && nal_unit_type <= 35) || (nal_unit_type == 39)
217 || (nal_unit_type >= 41 && nal_unit_type <= 44)
218 || (nal_unit_type >= 48 && nal_unit_type <= 55);
219}
220
221void H264or5VideoStreamParser
222::removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize) {
223 u_int8_t const* nalUnitOrig = fStartOfFrame + fOutputStartCodeSize;
224 unsigned const numBytesInNALunit = fTo - nalUnitOrig;
225 nalUnitCopySize
226 = removeH264or5EmulationBytes(nalUnitCopy, maxSize, nalUnitOrig, numBytesInNALunit);
227}
228
229#ifdef DEBUG
230char const* nal_unit_type_description_h264[32] = {
231 "Unspecified", //0
232 "Coded slice of a non-IDR picture", //1
233 "Coded slice data partition A", //2
234 "Coded slice data partition B", //3
235 "Coded slice data partition C", //4
236 "Coded slice of an IDR picture", //5
237 "Supplemental enhancement information (SEI)", //6
238 "Sequence parameter set", //7
239 "Picture parameter set", //8
240 "Access unit delimiter", //9
241 "End of sequence", //10
242 "End of stream", //11
243 "Filler data", //12
244 "Sequence parameter set extension", //13
245 "Prefix NAL unit", //14
246 "Subset sequence parameter set", //15
247 "Reserved", //16
248 "Reserved", //17
249 "Reserved", //18
250 "Coded slice of an auxiliary coded picture without partitioning", //19
251 "Coded slice extension", //20
252 "Reserved", //21
253 "Reserved", //22
254 "Reserved", //23
255 "Unspecified", //24
256 "Unspecified", //25
257 "Unspecified", //26
258 "Unspecified", //27
259 "Unspecified", //28
260 "Unspecified", //29
261 "Unspecified", //30
262 "Unspecified" //31
263};
264char const* nal_unit_type_description_h265[64] = {
265 "Coded slice segment of a non-TSA, non-STSA trailing picture", //0
266 "Coded slice segment of a non-TSA, non-STSA trailing picture", //1
267 "Coded slice segment of a TSA picture", //2
268 "Coded slice segment of a TSA picture", //3
269 "Coded slice segment of a STSA picture", //4
270 "Coded slice segment of a STSA picture", //5
271 "Coded slice segment of a RADL picture", //6
272 "Coded slice segment of a RADL picture", //7
273 "Coded slice segment of a RASL picture", //8
274 "Coded slice segment of a RASL picture", //9
275 "Reserved", //10
276 "Reserved", //11
277 "Reserved", //12
278 "Reserved", //13
279 "Reserved", //14
280 "Reserved", //15
281 "Coded slice segment of a BLA picture", //16
282 "Coded slice segment of a BLA picture", //17
283 "Coded slice segment of a BLA picture", //18
284 "Coded slice segment of an IDR picture", //19
285 "Coded slice segment of an IDR picture", //20
286 "Coded slice segment of a CRA picture", //21
287 "Reserved", //22
288 "Reserved", //23
289 "Reserved", //24
290 "Reserved", //25
291 "Reserved", //26
292 "Reserved", //27
293 "Reserved", //28
294 "Reserved", //29
295 "Reserved", //30
296 "Reserved", //31
297 "Video parameter set", //32
298 "Sequence parameter set", //33
299 "Picture parameter set", //34
300 "Access unit delimiter", //35
301 "End of sequence", //36
302 "End of bitstream", //37
303 "Filler data", //38
304 "Supplemental enhancement information (SEI)", //39
305 "Supplemental enhancement information (SEI)", //40
306 "Reserved", //41
307 "Reserved", //42
308 "Reserved", //43
309 "Reserved", //44
310 "Reserved", //45
311 "Reserved", //46
312 "Reserved", //47
313 "Unspecified", //48
314 "Unspecified", //49
315 "Unspecified", //50
316 "Unspecified", //51
317 "Unspecified", //52
318 "Unspecified", //53
319 "Unspecified", //54
320 "Unspecified", //55
321 "Unspecified", //56
322 "Unspecified", //57
323 "Unspecified", //58
324 "Unspecified", //59
325 "Unspecified", //60
326 "Unspecified", //61
327 "Unspecified", //62
328 "Unspecified", //63
329};
330#endif
331
332#ifdef DEBUG
333static unsigned numDebugTabs = 1;
334#define DEBUG_PRINT_TABS for (unsigned _i = 0; _i < numDebugTabs; ++_i) fprintf(stderr, "\t")
335#define DEBUG_PRINT(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s: %d\n", #x, x); } while (0)
336#define DEBUG_STR(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s\n", x); } while (0)
337class DebugTab {
338public:
339 DebugTab() {++numDebugTabs;}
340 ~DebugTab() {--numDebugTabs;}
341};
342#define DEBUG_TAB DebugTab dummy
343#else
344#define DEBUG_PRINT(x) do {x = x;} while (0)
345 // Note: the "x=x;" statement is intended to eliminate "unused variable" compiler warning messages
346#define DEBUG_STR(x) do {} while (0)
347#define DEBUG_TAB do {} while (0)
348#endif
349
350void H264or5VideoStreamParser::profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1) {
351 bv.skipBits(96);
352
353 unsigned i;
354 Boolean sub_layer_profile_present_flag[7], sub_layer_level_present_flag[7];
355 for (i = 0; i < max_sub_layers_minus1; ++i) {
356 sub_layer_profile_present_flag[i] = bv.get1BitBoolean();
357 sub_layer_level_present_flag[i] = bv.get1BitBoolean();
358 }
359 if (max_sub_layers_minus1 > 0) {
360 bv.skipBits(2*(8-max_sub_layers_minus1)); // reserved_zero_2bits
361 }
362 for (i = 0; i < max_sub_layers_minus1; ++i) {
363 if (sub_layer_profile_present_flag[i]) {
364 bv.skipBits(88);
365 }
366 if (sub_layer_level_present_flag[i]) {
367 bv.skipBits(8); // sub_layer_level_idc[i]
368 }
369 }
370}
371
372void H264or5VideoStreamParser
373::analyze_vui_parameters(BitVector& bv,
374 unsigned& num_units_in_tick, unsigned& time_scale) {
375 Boolean aspect_ratio_info_present_flag = bv.get1BitBoolean();
376 DEBUG_PRINT(aspect_ratio_info_present_flag);
377 if (aspect_ratio_info_present_flag) {
378 DEBUG_TAB;
379 unsigned aspect_ratio_idc = bv.getBits(8);
380 DEBUG_PRINT(aspect_ratio_idc);
381 if (aspect_ratio_idc == 255/*Extended_SAR*/) {
382 bv.skipBits(32); // sar_width; sar_height
383 }
384 }
385 Boolean overscan_info_present_flag = bv.get1BitBoolean();
386 DEBUG_PRINT(overscan_info_present_flag);
387 if (overscan_info_present_flag) {
388 bv.skipBits(1); // overscan_appropriate_flag
389 }
390 Boolean video_signal_type_present_flag = bv.get1BitBoolean();
391 DEBUG_PRINT(video_signal_type_present_flag);
392 if (video_signal_type_present_flag) {
393 DEBUG_TAB;
394 bv.skipBits(4); // video_format; video_full_range_flag
395 Boolean colour_description_present_flag = bv.get1BitBoolean();
396 DEBUG_PRINT(colour_description_present_flag);
397 if (colour_description_present_flag) {
398 bv.skipBits(24); // colour_primaries; transfer_characteristics; matrix_coefficients
399 }
400 }
401 Boolean chroma_loc_info_present_flag = bv.get1BitBoolean();
402 DEBUG_PRINT(chroma_loc_info_present_flag);
403 if (chroma_loc_info_present_flag) {
404 (void)bv.get_expGolomb(); // chroma_sample_loc_type_top_field
405 (void)bv.get_expGolomb(); // chroma_sample_loc_type_bottom_field
406 }
407 if (fHNumber == 265) {
408 bv.skipBits(2); // neutral_chroma_indication_flag, field_seq_flag
409 Boolean frame_field_info_present_flag = bv.get1BitBoolean();
410 DEBUG_PRINT(frame_field_info_present_flag);
411 pic_struct_present_flag = frame_field_info_present_flag; // hack to make H.265 like H.264
412 Boolean default_display_window_flag = bv.get1BitBoolean();
413 DEBUG_PRINT(default_display_window_flag);
414 if (default_display_window_flag) {
415 (void)bv.get_expGolomb(); // def_disp_win_left_offset
416 (void)bv.get_expGolomb(); // def_disp_win_right_offset
417 (void)bv.get_expGolomb(); // def_disp_win_top_offset
418 (void)bv.get_expGolomb(); // def_disp_win_bottom_offset
419 }
420 }
421 Boolean timing_info_present_flag = bv.get1BitBoolean();
422 DEBUG_PRINT(timing_info_present_flag);
423 if (timing_info_present_flag) {
424 DEBUG_TAB;
425 num_units_in_tick = bv.getBits(32);
426 DEBUG_PRINT(num_units_in_tick);
427 time_scale = bv.getBits(32);
428 DEBUG_PRINT(time_scale);
429 if (fHNumber == 264) {
430 Boolean fixed_frame_rate_flag = bv.get1BitBoolean();
431 DEBUG_PRINT(fixed_frame_rate_flag);
432 } else { // 265
433 Boolean vui_poc_proportional_to_timing_flag = bv.get1BitBoolean();
434 DEBUG_PRINT(vui_poc_proportional_to_timing_flag);
435 if (vui_poc_proportional_to_timing_flag) {
436 unsigned vui_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
437 DEBUG_PRINT(vui_num_ticks_poc_diff_one_minus1);
438 }
439 return; // For H.265, don't bother parsing any more of this #####
440 }
441 }
442 // The following is H.264 only: #####
443 Boolean nal_hrd_parameters_present_flag = bv.get1BitBoolean();
444 DEBUG_PRINT(nal_hrd_parameters_present_flag);
445 if (nal_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
446 Boolean vcl_hrd_parameters_present_flag = bv.get1BitBoolean();
447 DEBUG_PRINT(vcl_hrd_parameters_present_flag);
448 if (vcl_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
449 CpbDpbDelaysPresentFlag = nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag;
450 if (CpbDpbDelaysPresentFlag) {
451 bv.skipBits(1); // low_delay_hrd_flag
452 }
453 pic_struct_present_flag = bv.get1BitBoolean();
454 DEBUG_PRINT(pic_struct_present_flag);
455}
456
457void H264or5VideoStreamParser::analyze_hrd_parameters(BitVector& bv) {
458 DEBUG_TAB;
459 unsigned cpb_cnt_minus1 = bv.get_expGolomb();
460 DEBUG_PRINT(cpb_cnt_minus1);
461 unsigned bit_rate_scale = bv.getBits(4);
462 DEBUG_PRINT(bit_rate_scale);
463 unsigned cpb_size_scale = bv.getBits(4);
464 DEBUG_PRINT(cpb_size_scale);
465 for (unsigned SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; ++SchedSelIdx) {
466 DEBUG_TAB;
467 DEBUG_PRINT(SchedSelIdx);
468 unsigned bit_rate_value_minus1 = bv.get_expGolomb();
469 DEBUG_PRINT(bit_rate_value_minus1);
470 unsigned cpb_size_value_minus1 = bv.get_expGolomb();
471 DEBUG_PRINT(cpb_size_value_minus1);
472 Boolean cbr_flag = bv.get1BitBoolean();
473 DEBUG_PRINT(cbr_flag);
474 }
475 unsigned initial_cpb_removal_delay_length_minus1 = bv.getBits(5);
476 DEBUG_PRINT(initial_cpb_removal_delay_length_minus1);
477 cpb_removal_delay_length_minus1 = bv.getBits(5);
478 DEBUG_PRINT(cpb_removal_delay_length_minus1);
479 dpb_output_delay_length_minus1 = bv.getBits(5);
480 DEBUG_PRINT(dpb_output_delay_length_minus1);
481 unsigned time_offset_length = bv.getBits(5);
482 DEBUG_PRINT(time_offset_length);
483}
484
485void H264or5VideoStreamParser
486::analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
487 num_units_in_tick = time_scale = 0; // default values
488
489 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
490 u_int8_t vps[VPS_MAX_SIZE];
491 unsigned vpsSize;
492 removeEmulationBytes(vps, sizeof vps, vpsSize);
493
494 BitVector bv(vps, 0, 8*vpsSize);
495
496 // Assert: fHNumber == 265 (because this function is called only when parsing H.265)
497 unsigned i;
498
499 bv.skipBits(28); // nal_unit_header, vps_video_parameter_set_id, vps_reserved_three_2bits, vps_max_layers_minus1
500 unsigned vps_max_sub_layers_minus1 = bv.getBits(3);
501 DEBUG_PRINT(vps_max_sub_layers_minus1);
502 bv.skipBits(17); // vps_temporal_id_nesting_flag, vps_reserved_0xffff_16bits
503 profile_tier_level(bv, vps_max_sub_layers_minus1);
504 Boolean vps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
505 DEBUG_PRINT(vps_sub_layer_ordering_info_present_flag);
506 for (i = vps_sub_layer_ordering_info_present_flag ? 0 : vps_max_sub_layers_minus1;
507 i <= vps_max_sub_layers_minus1; ++i) {
508 (void)bv.get_expGolomb(); // vps_max_dec_pic_buffering_minus1[i]
509 (void)bv.get_expGolomb(); // vps_max_num_reorder_pics[i]
510 (void)bv.get_expGolomb(); // vps_max_latency_increase_plus1[i]
511 }
512 unsigned vps_max_layer_id = bv.getBits(6);
513 DEBUG_PRINT(vps_max_layer_id);
514 unsigned vps_num_layer_sets_minus1 = bv.get_expGolomb();
515 DEBUG_PRINT(vps_num_layer_sets_minus1);
516 for (i = 1; i <= vps_num_layer_sets_minus1; ++i) {
517 bv.skipBits(vps_max_layer_id+1); // layer_id_included_flag[i][0..vps_max_layer_id]
518 }
519 Boolean vps_timing_info_present_flag = bv.get1BitBoolean();
520 DEBUG_PRINT(vps_timing_info_present_flag);
521 if (vps_timing_info_present_flag) {
522 DEBUG_TAB;
523 num_units_in_tick = bv.getBits(32);
524 DEBUG_PRINT(num_units_in_tick);
525 time_scale = bv.getBits(32);
526 DEBUG_PRINT(time_scale);
527 Boolean vps_poc_proportional_to_timing_flag = bv.get1BitBoolean();
528 DEBUG_PRINT(vps_poc_proportional_to_timing_flag);
529 if (vps_poc_proportional_to_timing_flag) {
530 unsigned vps_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
531 DEBUG_PRINT(vps_num_ticks_poc_diff_one_minus1);
532 }
533 }
534 Boolean vps_extension_flag = bv.get1BitBoolean();
535 DEBUG_PRINT(vps_extension_flag);
536}
537
538void H264or5VideoStreamParser
539::analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
540 num_units_in_tick = time_scale = 0; // default values
541
542 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
543 u_int8_t sps[SPS_MAX_SIZE];
544 unsigned spsSize;
545 removeEmulationBytes(sps, sizeof sps, spsSize);
546
547 BitVector bv(sps, 0, 8*spsSize);
548
549 if (fHNumber == 264) {
550 bv.skipBits(8); // forbidden_zero_bit; nal_ref_idc; nal_unit_type
551 unsigned profile_idc = bv.getBits(8);
552 DEBUG_PRINT(profile_idc);
553 unsigned constraint_setN_flag = bv.getBits(8); // also "reserved_zero_2bits" at end
554 DEBUG_PRINT(constraint_setN_flag);
555 unsigned level_idc = bv.getBits(8);
556 DEBUG_PRINT(level_idc);
557 unsigned seq_parameter_set_id = bv.get_expGolomb();
558 DEBUG_PRINT(seq_parameter_set_id);
559 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ) {
560 DEBUG_TAB;
561 unsigned chroma_format_idc = bv.get_expGolomb();
562 DEBUG_PRINT(chroma_format_idc);
563 if (chroma_format_idc == 3) {
564 DEBUG_TAB;
565 Boolean separate_colour_plane_flag = bv.get1BitBoolean();
566 DEBUG_PRINT(separate_colour_plane_flag);
567 }
568 (void)bv.get_expGolomb(); // bit_depth_luma_minus8
569 (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
570 bv.skipBits(1); // qpprime_y_zero_transform_bypass_flag
571 Boolean seq_scaling_matrix_present_flag = bv.get1BitBoolean();
572 DEBUG_PRINT(seq_scaling_matrix_present_flag);
573 if (seq_scaling_matrix_present_flag) {
574 for (int i = 0; i < ((chroma_format_idc != 3) ? 8 : 12); ++i) {
575 DEBUG_TAB;
576 DEBUG_PRINT(i);
577 Boolean seq_scaling_list_present_flag = bv.get1BitBoolean();
578 DEBUG_PRINT(seq_scaling_list_present_flag);
579 if (seq_scaling_list_present_flag) {
580 DEBUG_TAB;
581 unsigned sizeOfScalingList = i < 6 ? 16 : 64;
582 unsigned lastScale = 8;
583 unsigned nextScale = 8;
584 for (unsigned j = 0; j < sizeOfScalingList; ++j) {
585 DEBUG_TAB;
586 DEBUG_PRINT(j);
587 DEBUG_PRINT(nextScale);
588 if (nextScale != 0) {
589 DEBUG_TAB;
590 int delta_scale = bv.get_expGolombSigned();
591 DEBUG_PRINT(delta_scale);
592 nextScale = (lastScale + delta_scale + 256) % 256;
593 }
594 lastScale = (nextScale == 0) ? lastScale : nextScale;
595 DEBUG_PRINT(lastScale);
596 }
597 }
598 }
599 }
600 }
601 unsigned log2_max_frame_num_minus4 = bv.get_expGolomb();
602 DEBUG_PRINT(log2_max_frame_num_minus4);
603 unsigned pic_order_cnt_type = bv.get_expGolomb();
604 DEBUG_PRINT(pic_order_cnt_type);
605 if (pic_order_cnt_type == 0) {
606 DEBUG_TAB;
607 unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
608 DEBUG_PRINT(log2_max_pic_order_cnt_lsb_minus4);
609 } else if (pic_order_cnt_type == 1) {
610 DEBUG_TAB;
611 bv.skipBits(1); // delta_pic_order_always_zero_flag
612 (void)bv.get_expGolombSigned(); // offset_for_non_ref_pic
613 (void)bv.get_expGolombSigned(); // offset_for_top_to_bottom_field
614 unsigned num_ref_frames_in_pic_order_cnt_cycle = bv.get_expGolomb();
615 DEBUG_PRINT(num_ref_frames_in_pic_order_cnt_cycle);
616 for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
617 (void)bv.get_expGolombSigned(); // offset_for_ref_frame[i]
618 }
619 }
620 unsigned max_num_ref_frames = bv.get_expGolomb();
621 DEBUG_PRINT(max_num_ref_frames);
622 Boolean gaps_in_frame_num_value_allowed_flag = bv.get1BitBoolean();
623 DEBUG_PRINT(gaps_in_frame_num_value_allowed_flag);
624 unsigned pic_width_in_mbs_minus1 = bv.get_expGolomb();
625 DEBUG_PRINT(pic_width_in_mbs_minus1);
626 unsigned pic_height_in_map_units_minus1 = bv.get_expGolomb();
627 DEBUG_PRINT(pic_height_in_map_units_minus1);
628 Boolean frame_mbs_only_flag = bv.get1BitBoolean();
629 DEBUG_PRINT(frame_mbs_only_flag);
630 if (!frame_mbs_only_flag) {
631 bv.skipBits(1); // mb_adaptive_frame_field_flag
632 }
633 bv.skipBits(1); // direct_8x8_inference_flag
634 Boolean frame_cropping_flag = bv.get1BitBoolean();
635 DEBUG_PRINT(frame_cropping_flag);
636 if (frame_cropping_flag) {
637 (void)bv.get_expGolomb(); // frame_crop_left_offset
638 (void)bv.get_expGolomb(); // frame_crop_right_offset
639 (void)bv.get_expGolomb(); // frame_crop_top_offset
640 (void)bv.get_expGolomb(); // frame_crop_bottom_offset
641 }
642 Boolean vui_parameters_present_flag = bv.get1BitBoolean();
643 DEBUG_PRINT(vui_parameters_present_flag);
644 if (vui_parameters_present_flag) {
645 DEBUG_TAB;
646 analyze_vui_parameters(bv, num_units_in_tick, time_scale);
647 }
648 } else { // 265
649 unsigned i;
650
651 bv.skipBits(16); // nal_unit_header
652 bv.skipBits(4); // sps_video_parameter_set_id
653 unsigned sps_max_sub_layers_minus1 = bv.getBits(3);
654 DEBUG_PRINT(sps_max_sub_layers_minus1);
655 bv.skipBits(1); // sps_temporal_id_nesting_flag
656 profile_tier_level(bv, sps_max_sub_layers_minus1);
657 (void)bv.get_expGolomb(); // sps_seq_parameter_set_id
658 unsigned chroma_format_idc = bv.get_expGolomb();
659 DEBUG_PRINT(chroma_format_idc);
660 if (chroma_format_idc == 3) bv.skipBits(1); // separate_colour_plane_flag
661 unsigned pic_width_in_luma_samples = bv.get_expGolomb();
662 DEBUG_PRINT(pic_width_in_luma_samples);
663 unsigned pic_height_in_luma_samples = bv.get_expGolomb();
664 DEBUG_PRINT(pic_height_in_luma_samples);
665 Boolean conformance_window_flag = bv.get1BitBoolean();
666 DEBUG_PRINT(conformance_window_flag);
667 if (conformance_window_flag) {
668 DEBUG_TAB;
669 unsigned conf_win_left_offset = bv.get_expGolomb();
670 DEBUG_PRINT(conf_win_left_offset);
671 unsigned conf_win_right_offset = bv.get_expGolomb();
672 DEBUG_PRINT(conf_win_right_offset);
673 unsigned conf_win_top_offset = bv.get_expGolomb();
674 DEBUG_PRINT(conf_win_top_offset);
675 unsigned conf_win_bottom_offset = bv.get_expGolomb();
676 DEBUG_PRINT(conf_win_bottom_offset);
677 }
678 (void)bv.get_expGolomb(); // bit_depth_luma_minus8
679 (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
680 unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
681 Boolean sps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
682 DEBUG_PRINT(sps_sub_layer_ordering_info_present_flag);
683 for (i = (sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1);
684 i <= sps_max_sub_layers_minus1; ++i) {
685 (void)bv.get_expGolomb(); // sps_max_dec_pic_buffering_minus1[i]
686 (void)bv.get_expGolomb(); // sps_max_num_reorder_pics[i]
687 (void)bv.get_expGolomb(); // sps_max_latency_increase[i]
688 }
689 (void)bv.get_expGolomb(); // log2_min_luma_coding_block_size_minus3
690 (void)bv.get_expGolomb(); // log2_diff_max_min_luma_coding_block_size
691 (void)bv.get_expGolomb(); // log2_min_transform_block_size_minus2
692 (void)bv.get_expGolomb(); // log2_diff_max_min_transform_block_size
693 (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_inter
694 (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_intra
695 Boolean scaling_list_enabled_flag = bv.get1BitBoolean();
696 DEBUG_PRINT(scaling_list_enabled_flag);
697 if (scaling_list_enabled_flag) {
698 DEBUG_TAB;
699 Boolean sps_scaling_list_data_present_flag = bv.get1BitBoolean();
700 DEBUG_PRINT(sps_scaling_list_data_present_flag);
701 if (sps_scaling_list_data_present_flag) {
702 // scaling_list_data()
703 DEBUG_TAB;
704 for (unsigned sizeId = 0; sizeId < 4; ++sizeId) {
705 DEBUG_PRINT(sizeId);
706 for (unsigned matrixId = 0; matrixId < (sizeId == 3 ? 2 : 6); ++matrixId) {
707 DEBUG_TAB;
708 DEBUG_PRINT(matrixId);
709 Boolean scaling_list_pred_mode_flag = bv.get1BitBoolean();
710 DEBUG_PRINT(scaling_list_pred_mode_flag);
711 if (!scaling_list_pred_mode_flag) {
712 (void)bv.get_expGolomb(); // scaling_list_pred_matrix_id_delta[sizeId][matrixId]
713 } else {
714 unsigned const c = 1 << (4+(sizeId<<1));
715 unsigned coefNum = c < 64 ? c : 64;
716 if (sizeId > 1) {
717 (void)bv.get_expGolomb(); // scaling_list_dc_coef_minus8[sizeId][matrixId]
718 }
719 for (i = 0; i < coefNum; ++i) {
720 (void)bv.get_expGolomb(); // scaling_list_delta_coef
721 }
722 }
723 }
724 }
725 }
726 }
727 bv.skipBits(2); // amp_enabled_flag, sample_adaptive_offset_enabled_flag
728 Boolean pcm_enabled_flag = bv.get1BitBoolean();
729 DEBUG_PRINT(pcm_enabled_flag);
730 if (pcm_enabled_flag) {
731 bv.skipBits(8); // pcm_sample_bit_depth_luma_minus1, pcm_sample_bit_depth_chroma_minus1
732 (void)bv.get_expGolomb(); // log2_min_pcm_luma_coding_block_size_minus3
733 (void)bv.get_expGolomb(); // log2_diff_max_min_pcm_luma_coding_block_size
734 bv.skipBits(1); // pcm_loop_filter_disabled_flag
735 }
736 unsigned num_short_term_ref_pic_sets = bv.get_expGolomb();
737 DEBUG_PRINT(num_short_term_ref_pic_sets);
738 unsigned num_negative_pics = 0, prev_num_negative_pics = 0;
739 unsigned num_positive_pics = 0, prev_num_positive_pics = 0;
740 for (i = 0; i < num_short_term_ref_pic_sets; ++i) {
741 // short_term_ref_pic_set(i):
742 DEBUG_TAB;
743 DEBUG_PRINT(i);
744 Boolean inter_ref_pic_set_prediction_flag = False;
745 if (i != 0) {
746 inter_ref_pic_set_prediction_flag = bv.get1BitBoolean();
747 }
748 DEBUG_PRINT(inter_ref_pic_set_prediction_flag);
749 if (inter_ref_pic_set_prediction_flag) {
750 DEBUG_TAB;
751 if (i == num_short_term_ref_pic_sets) {
752 // This can't happen here, but it's in the spec, so we include it for completeness
753 (void)bv.get_expGolomb(); // delta_idx_minus1
754 }
755 bv.skipBits(1); // delta_rps_sign
756 (void)bv.get_expGolomb(); // abs_delta_rps_minus1
757 unsigned NumDeltaPocs = prev_num_negative_pics + prev_num_positive_pics; // correct???
758 for (unsigned j = 0; j < NumDeltaPocs; ++j) {
759 DEBUG_PRINT(j);
760 Boolean used_by_curr_pic_flag = bv.get1BitBoolean();
761 DEBUG_PRINT(used_by_curr_pic_flag);
762 if (!used_by_curr_pic_flag) bv.skipBits(1); // use_delta_flag[j]
763 }
764 } else {
765 prev_num_negative_pics = num_negative_pics;
766 num_negative_pics = bv.get_expGolomb();
767 DEBUG_PRINT(num_negative_pics);
768 prev_num_positive_pics = num_positive_pics;
769 num_positive_pics = bv.get_expGolomb();
770 DEBUG_PRINT(num_positive_pics);
771 unsigned k;
772 for (k = 0; k < num_negative_pics; ++k) {
773 (void)bv.get_expGolomb(); // delta_poc_s0_minus1[k]
774 bv.skipBits(1); // used_by_curr_pic_s0_flag[k]
775 }
776 for (k = 0; k < num_positive_pics; ++k) {
777 (void)bv.get_expGolomb(); // delta_poc_s1_minus1[k]
778 bv.skipBits(1); // used_by_curr_pic_s1_flag[k]
779 }
780 }
781 }
782 Boolean long_term_ref_pics_present_flag = bv.get1BitBoolean();
783 DEBUG_PRINT(long_term_ref_pics_present_flag);
784 if (long_term_ref_pics_present_flag) {
785 DEBUG_TAB;
786 unsigned num_long_term_ref_pics_sps = bv.get_expGolomb();
787 DEBUG_PRINT(num_long_term_ref_pics_sps);
788 for (i = 0; i < num_long_term_ref_pics_sps; ++i) {
789 bv.skipBits(log2_max_pic_order_cnt_lsb_minus4); // lt_ref_pic_poc_lsb_sps[i]
790 bv.skipBits(1); // used_by_curr_pic_lt_sps_flag[1]
791 }
792 }
793 bv.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag
794 Boolean vui_parameters_present_flag = bv.get1BitBoolean();
795 DEBUG_PRINT(vui_parameters_present_flag);
796 if (vui_parameters_present_flag) {
797 DEBUG_TAB;
798 analyze_vui_parameters(bv, num_units_in_tick, time_scale);
799 }
800 Boolean sps_extension_flag = bv.get1BitBoolean();
801 DEBUG_PRINT(sps_extension_flag);
802 }
803}
804
805#define SEI_MAX_SIZE 5000 // larger than the largest possible SEI NAL unit
806
807#ifdef DEBUG
808#define MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264 46
809char const* sei_payloadType_description_h264[MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264+1] = {
810 "buffering_period", //0
811 "pic_timing", //1
812 "pan_scan_rect", //2
813 "filler_payload", //3
814 "user_data_registered_itu_t_t35", //4
815 "user_data_unregistered", //5
816 "recovery_point", //6
817 "dec_ref_pic_marking_repetition", //7
818 "spare_pic", //8
819 "scene_info", //9
820 "sub_seq_info", //10
821 "sub_seq_layer_characteristics", //11
822 "sub_seq_characteristics", //12
823 "full_frame_freeze", //13
824 "full_frame_freeze_release", //14
825 "full_frame_snapshot", //15
826 "progressive_refinement_segment_start", //16
827 "progressive_refinement_segment_end", //17
828 "motion_constrained_slice_group_set", //18
829 "film_grain_characteristics", //19
830 "deblocking_filter_display_preference", //20
831 "stereo_video_info", //21
832 "post_filter_hint", //22
833 "tone_mapping_info", //23
834 "scalability_info", //24
835 "sub_pic_scalable_layer", //25
836 "non_required_layer_rep", //26
837 "priority_layer_info", //27
838 "layers_not_present", //28
839 "layer_dependency_change", //29
840 "scalable_nesting", //30
841 "base_layer_temporal_hrd", //31
842 "quality_layer_integrity_check", //32
843 "redundant_pic_property", //33
844 "tl0_dep_rep_index", //34
845 "tl_switching_point", //35
846 "parallel_decoding_info", //36
847 "mvc_scalable_nesting", //37
848 "view_scalability_info", //38
849 "multiview_scene_info", //39
850 "multiview_acquisition_info", //40
851 "non_required_view_component", //41
852 "view_dependency_change", //42
853 "operation_points_not_present", //43
854 "base_view_temporal_hrd", //44
855 "frame_packing_arrangement", //45
856 "reserved_sei_message" // 46 or higher
857};
858#endif
859
860void H264or5VideoStreamParser::analyze_sei_data(u_int8_t nal_unit_type) {
861 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
862 u_int8_t sei[SEI_MAX_SIZE];
863 unsigned seiSize;
864 removeEmulationBytes(sei, sizeof sei, seiSize);
865
866 unsigned j = 1; // skip the initial byte (forbidden_zero_bit; nal_ref_idc; nal_unit_type); we've already seen it
867 while (j < seiSize) {
868 unsigned payloadType = 0;
869 do {
870 payloadType += sei[j];
871 } while (sei[j++] == 255 && j < seiSize);
872 if (j >= seiSize) break;
873
874 unsigned payloadSize = 0;
875 do {
876 payloadSize += sei[j];
877 } while (sei[j++] == 255 && j < seiSize);
878 if (j >= seiSize) break;
879
880#ifdef DEBUG
881 char const* description;
882 if (fHNumber == 264) {
883 unsigned descriptionNum = payloadType <= MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264
884 ? payloadType : MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264;
885 description = sei_payloadType_description_h264[descriptionNum];
886 } else { // 265
887 description =
888 payloadType == 3 ? "filler_payload" :
889 payloadType == 4 ? "user_data_registered_itu_t_t35" :
890 payloadType == 5 ? "user_data_unregistered" :
891 payloadType == 17 ? "progressive_refinement_segment_end" :
892 payloadType == 22 ? "post_filter_hint" :
893 (payloadType == 132 && nal_unit_type == SUFFIX_SEI_NUT) ? "decoded_picture_hash" :
894 nal_unit_type == SUFFIX_SEI_NUT ? "reserved_sei_message" :
895 payloadType == 0 ? "buffering_period" :
896 payloadType == 1 ? "pic_timing" :
897 payloadType == 2 ? "pan_scan_rect" :
898 payloadType == 6 ? "recovery_point" :
899 payloadType == 9 ? "scene_info" :
900 payloadType == 15 ? "picture_snapshot" :
901 payloadType == 16 ? "progressive_refinement_segment_start" :
902 payloadType == 19 ? "film_grain_characteristics" :
903 payloadType == 23 ? "tone_mapping_info" :
904 payloadType == 45 ? "frame_packing_arrangement" :
905 payloadType == 47 ? "display_orientation" :
906 payloadType == 128 ? "structure_of_pictures_info" :
907 payloadType == 129 ? "active_parameter_sets" :
908 payloadType == 130 ? "decoding_unit_info" :
909 payloadType == 131 ? "temporal_sub_layer_zero_index" :
910 payloadType == 133 ? "scalable_nesting" :
911 payloadType == 134 ? "region_refresh_info" : "reserved_sei_message";
912 }
913 fprintf(stderr, "\tpayloadType %d (\"%s\"); payloadSize %d\n", payloadType, description, payloadSize);
914#endif
915
916 analyze_sei_payload(payloadType, payloadSize, &sei[j]);
917 j += payloadSize;
918 }
919}
920
921void H264or5VideoStreamParser
922::analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload) {
923 if (payloadType == 1/* pic_timing, for both H.264 and H.265 */) {
924 BitVector bv(payload, 0, 8*payloadSize);
925
926 DEBUG_TAB;
927 if (CpbDpbDelaysPresentFlag) {
928 unsigned cpb_removal_delay = bv.getBits(cpb_removal_delay_length_minus1 + 1);
929 DEBUG_PRINT(cpb_removal_delay);
930 unsigned dpb_output_delay = bv.getBits(dpb_output_delay_length_minus1 + 1);
931 DEBUG_PRINT(dpb_output_delay);
932 }
933 double prevDeltaTfiDivisor = DeltaTfiDivisor;
934 if (pic_struct_present_flag) {
935 unsigned pic_struct = bv.getBits(4);
936 DEBUG_PRINT(pic_struct);
937 // Use this to set "DeltaTfiDivisor" (which is used to compute the frame rate):
938 if (fHNumber == 264) {
939 DeltaTfiDivisor =
940 pic_struct == 0 ? 2.0 :
941 pic_struct <= 2 ? 1.0 :
942 pic_struct <= 4 ? 2.0 :
943 pic_struct <= 6 ? 3.0 :
944 pic_struct == 7 ? 4.0 :
945 pic_struct == 8 ? 6.0 :
946 2.0;
947 } else { // H.265
948 DeltaTfiDivisor =
949 pic_struct == 0 ? 2.0 :
950 pic_struct <= 2 ? 1.0 :
951 pic_struct <= 4 ? 2.0 :
952 pic_struct <= 6 ? 3.0 :
953 pic_struct == 7 ? 2.0 :
954 pic_struct == 8 ? 3.0 :
955 pic_struct <= 12 ? 1.0 :
956 2.0;
957 }
958 } else {
959 if (fHNumber == 264) {
960 // Need to get field_pic_flag from slice_header to set this properly! #####
961 } else { // H.265
962 DeltaTfiDivisor = 1.0;
963 }
964 }
965 // If "DeltaTfiDivisor" has changed, and we've already computed the frame rate, then
966 // adjust it, based on the new value of "DeltaTfiDivisor":
967 if (DeltaTfiDivisor != prevDeltaTfiDivisor && fParsedFrameRate != 0.0) {
968 usingSource()->fFrameRate = fParsedFrameRate
969 = fParsedFrameRate*(prevDeltaTfiDivisor/DeltaTfiDivisor);
970#ifdef DEBUG
971 fprintf(stderr, "Changed frame rate to %f fps\n", usingSource()->fFrameRate);
972#endif
973 }
974 // Ignore the rest of the payload (timestamps) for now... #####
975 }
976}
977
978void H264or5VideoStreamParser::flushInput() {
979 fHaveSeenFirstStartCode = False;
980 fHaveSeenFirstByteOfNALUnit = False;
981
982 StreamParser::flushInput();
983}
984
985unsigned H264or5VideoStreamParser::parse() {
986 try {
987 // The stream must start with a 0x00000001:
988 if (!fHaveSeenFirstStartCode) {
989 // Skip over any input bytes that precede the first 0x00000001:
990 u_int32_t first4Bytes;
991 while ((first4Bytes = test4Bytes()) != 0x00000001) {
992 get1Byte(); setParseState(); // ensures that we progress over bad data
993 }
994 skipBytes(4); // skip this initial code
995
996 setParseState();
997 fHaveSeenFirstStartCode = True; // from now on
998 }
999
1000 if (fOutputStartCodeSize > 0 && curFrameSize() == 0 && !haveSeenEOF()) {
1001 // Include a start code in the output:
1002 save4Bytes(0x00000001);
1003 }
1004
1005 // Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF.
1006 // Also make note of the first byte, because it contains the "nal_unit_type":
1007 if (haveSeenEOF()) {
1008 // We hit EOF the last time that we tried to parse this data, so we know that any remaining unparsed data
1009 // forms a complete NAL unit, and that there's no 'start code' at the end:
1010 unsigned remainingDataSize = totNumValidBytes() - curOffset();
1011#ifdef DEBUG
1012 unsigned const trailingNALUnitSize = remainingDataSize;
1013#endif
1014 while (remainingDataSize > 0) {
1015 u_int8_t nextByte = get1Byte();
1016 if (!fHaveSeenFirstByteOfNALUnit) {
1017 fFirstByteOfNALUnit = nextByte;
1018 fHaveSeenFirstByteOfNALUnit = True;
1019 }
1020 saveByte(nextByte);
1021 --remainingDataSize;
1022 }
1023
1024#ifdef DEBUG
1025 if (fHNumber == 264) {
1026 u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1027 u_int8_t nal_unit_type = fFirstByteOfNALUnit&0x1F;
1028 fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1029 trailingNALUnitSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1030 } else { // 265
1031 u_int8_t nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1032 fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1033 trailingNALUnitSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1034 }
1035#endif
1036
1037 (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time
1038 return 0;
1039 } else {
1040 u_int32_t next4Bytes = test4Bytes();
1041 if (!fHaveSeenFirstByteOfNALUnit) {
1042 fFirstByteOfNALUnit = next4Bytes>>24;
1043 fHaveSeenFirstByteOfNALUnit = True;
1044 }
1045 while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) {
1046 // We save at least some of "next4Bytes".
1047 if ((unsigned)(next4Bytes&0xFF) > 1) {
1048 // Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it:
1049 save4Bytes(next4Bytes);
1050 skipBytes(4);
1051 } else {
1052 // Save the first byte, and continue testing the rest:
1053 saveByte(next4Bytes>>24);
1054 skipBytes(1);
1055 }
1056 setParseState(); // ensures forward progress
1057 next4Bytes = test4Bytes();
1058 }
1059 // Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit).
1060 // Skip over these remaining bytes, up until the start of the next NAL unit:
1061 if (next4Bytes == 0x00000001) {
1062 skipBytes(4);
1063 } else {
1064 skipBytes(3);
1065 }
1066 }
1067
1068 fHaveSeenFirstByteOfNALUnit = False; // for the next NAL unit that we'll parse
1069 u_int8_t nal_unit_type;
1070 if (fHNumber == 264) {
1071 nal_unit_type = fFirstByteOfNALUnit&0x1F;
1072#ifdef DEBUG
1073 u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1074 fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1075 curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1076#endif
1077 } else { // 265
1078 nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1079#ifdef DEBUG
1080 fprintf(stderr, "Parsed %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1081 curFrameSize()-fOutputStartCodeSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1082#endif
1083 }
1084
1085 // Now that we have found (& copied) a NAL unit, process it if it's of special interest to us:
1086 if (isVPS(nal_unit_type)) { // Video parameter set
1087 // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1088 usingSource()->saveCopyOfVPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1089
1090 if (fParsedFrameRate == 0.0) {
1091 // We haven't yet parsed a frame rate from the stream.
1092 // So parse this NAL unit to check whether frame rate information is present:
1093 unsigned num_units_in_tick, time_scale;
1094 analyze_video_parameter_set_data(num_units_in_tick, time_scale);
1095 if (time_scale > 0 && num_units_in_tick > 0) {
1096 usingSource()->fFrameRate = fParsedFrameRate
1097 = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1098#ifdef DEBUG
1099 fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1100#endif
1101 } else {
1102#ifdef DEBUG
1103 fprintf(stderr, "\tThis \"Video Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1104#endif
1105 }
1106 }
1107 } else if (isSPS(nal_unit_type)) { // Sequence parameter set
1108 // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1109 usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1110
1111 if (fParsedFrameRate == 0.0) {
1112 // We haven't yet parsed a frame rate from the stream.
1113 // So parse this NAL unit to check whether frame rate information is present:
1114 unsigned num_units_in_tick, time_scale;
1115 analyze_seq_parameter_set_data(num_units_in_tick, time_scale);
1116 if (time_scale > 0 && num_units_in_tick > 0) {
1117 usingSource()->fFrameRate = fParsedFrameRate
1118 = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1119#ifdef DEBUG
1120 fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1121#endif
1122 } else {
1123#ifdef DEBUG
1124 fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1125#endif
1126 }
1127 }
1128 } else if (isPPS(nal_unit_type)) { // Picture parameter set
1129 // Save a copy of this NAL unit, in case the downstream object wants to see it:
1130 usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1131 } else if (isSEI(nal_unit_type)) { // Supplemental enhancement information (SEI)
1132 analyze_sei_data(nal_unit_type);
1133 // Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? #####
1134 }
1135
1136 usingSource()->setPresentationTime();
1137#ifdef DEBUG
1138 unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec;
1139 unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec;
1140 fprintf(stderr, "\tPresentation time: %lu.%06u\n", secs, uSecs);
1141#endif
1142
1143 // Now, check whether this NAL unit ends an 'access unit'.
1144 // (RTP streamers need to know this in order to figure out whether or not to set the "M" bit.)
1145 Boolean thisNALUnitEndsAccessUnit;
1146 if (haveSeenEOF() || isEOF(nal_unit_type)) {
1147 // There is no next NAL unit, so we assume that this one ends the current 'access unit':
1148 thisNALUnitEndsAccessUnit = True;
1149 } else if (usuallyBeginsAccessUnit(nal_unit_type)) {
1150 // These NAL units usually *begin* an access unit, so assume that they don't end one here:
1151 thisNALUnitEndsAccessUnit = False;
1152 } else {
1153 // We need to check the *next* NAL unit to figure out whether
1154 // the current NAL unit ends an 'access unit':
1155 u_int8_t firstBytesOfNextNALUnit[3];
1156 testBytes(firstBytesOfNextNALUnit, 3);
1157
1158 u_int8_t const& next_nal_unit_type = fHNumber == 264
1159 ? (firstBytesOfNextNALUnit[0]&0x1F) : ((firstBytesOfNextNALUnit[0]&0x7E)>>1);
1160 if (isVCL(next_nal_unit_type)) {
1161 // The high-order bit of the byte after the "nal_unit_header" tells us whether it's
1162 // the start of a new 'access unit' (and thus the current NAL unit ends an 'access unit'):
1163 u_int8_t const byteAfter_nal_unit_header
1164 = fHNumber == 264 ? firstBytesOfNextNALUnit[1] : firstBytesOfNextNALUnit[2];
1165 thisNALUnitEndsAccessUnit = (byteAfter_nal_unit_header&0x80) != 0;
1166 } else if (usuallyBeginsAccessUnit(next_nal_unit_type)) {
1167 // The next NAL unit's type is one that usually appears at the start of an 'access unit',
1168 // so we assume that the current NAL unit ends an 'access unit':
1169 thisNALUnitEndsAccessUnit = True;
1170 } else {
1171 // The next NAL unit definitely doesn't start a new 'access unit',
1172 // which means that the current NAL unit doesn't end one:
1173 thisNALUnitEndsAccessUnit = False;
1174 }
1175 }
1176
1177 if (thisNALUnitEndsAccessUnit) {
1178#ifdef DEBUG
1179 fprintf(stderr, "*****This NAL unit ends the current access unit*****\n");
1180#endif
1181 usingSource()->fPictureEndMarker = True;
1182 ++usingSource()->fPictureCount;
1183
1184 // Note that the presentation time for the next NAL unit will be different:
1185 struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias
1186 nextPT = usingSource()->fPresentationTime;
1187 double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate;
1188 unsigned nextSecsIncrement = (long)nextFraction;
1189 nextPT.tv_sec += (long)nextSecsIncrement;
1190 nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000);
1191 }
1192 setParseState();
1193
1194 return curFrameSize();
1195 } catch (int /*e*/) {
1196#ifdef DEBUG
1197 fprintf(stderr, "H264or5VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
1198#endif
1199 return 0; // the parsing got interrupted
1200 }
1201}
1202
1203unsigned removeH264or5EmulationBytes(u_int8_t* to, unsigned toMaxSize,
1204 u_int8_t const* from, unsigned fromSize) {
1205 unsigned toSize = 0;
1206 unsigned i = 0;
1207 while (i < fromSize && toSize+1 < toMaxSize) {
1208 if (i+2 < fromSize && from[i] == 0 && from[i+1] == 0 && from[i+2] == 3) {
1209 to[toSize] = to[toSize+1] = 0;
1210 toSize += 2;
1211 i += 3;
1212 } else {
1213 to[toSize] = from[i];
1214 toSize += 1;
1215 i += 1;
1216 }
1217 }
1218
1219 return toSize;
1220}
1221