1 | /********** |
2 | This library is free software; you can redistribute it and/or modify it under |
3 | the terms of the GNU Lesser General Public License as published by the |
4 | Free Software Foundation; either version 3 of the License, or (at your |
5 | option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.) |
6 | |
7 | This library is distributed in the hope that it will be useful, but WITHOUT |
8 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
9 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for |
10 | more details. |
11 | |
12 | You should have received a copy of the GNU Lesser General Public License |
13 | along with this library; if not, write to the Free Software Foundation, Inc., |
14 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
15 | **********/ |
16 | // "liveMedia" |
17 | // Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved. |
18 | // A filter that breaks up a H.264 or H.265 Video Elementary Stream into NAL units. |
19 | // Implementation |
20 | |
21 | #include "H264or5VideoStreamFramer.hh" |
22 | #include "MPEGVideoStreamParser.hh" |
23 | #include "BitVector.hh" |
24 | |
25 | ////////// H264or5VideoStreamParser definition ////////// |
26 | |
27 | class H264or5VideoStreamParser: public MPEGVideoStreamParser { |
28 | public: |
29 | H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource, |
30 | FramedSource* inputSource, Boolean includeStartCodeInOutput); |
31 | virtual ~H264or5VideoStreamParser(); |
32 | |
33 | private: // redefined virtual functions: |
34 | virtual void flushInput(); |
35 | virtual unsigned parse(); |
36 | |
37 | private: |
38 | H264or5VideoStreamFramer* usingSource() { |
39 | return (H264or5VideoStreamFramer*)fUsingSource; |
40 | } |
41 | |
42 | Boolean isVPS(u_int8_t nal_unit_type) { return usingSource()->isVPS(nal_unit_type); } |
43 | Boolean isSPS(u_int8_t nal_unit_type) { return usingSource()->isSPS(nal_unit_type); } |
44 | Boolean isPPS(u_int8_t nal_unit_type) { return usingSource()->isPPS(nal_unit_type); } |
45 | Boolean isVCL(u_int8_t nal_unit_type) { return usingSource()->isVCL(nal_unit_type); } |
46 | Boolean isSEI(u_int8_t nal_unit_type); |
47 | Boolean isEOF(u_int8_t nal_unit_type); |
48 | Boolean usuallyBeginsAccessUnit(u_int8_t nal_unit_type); |
49 | |
50 | void removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize); |
51 | |
52 | void analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale); |
53 | void analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale); |
54 | void profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1); |
55 | void analyze_vui_parameters(BitVector& bv, unsigned& num_units_in_tick, unsigned& time_scale); |
56 | void analyze_hrd_parameters(BitVector& bv); |
57 | void analyze_sei_data(u_int8_t nal_unit_type); |
58 | void analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload); |
59 | |
60 | private: |
61 | int fHNumber; // 264 or 265 |
62 | unsigned fOutputStartCodeSize; |
63 | Boolean fHaveSeenFirstStartCode, fHaveSeenFirstByteOfNALUnit; |
64 | u_int8_t fFirstByteOfNALUnit; |
65 | double fParsedFrameRate; |
66 | // variables set & used in the specification: |
67 | unsigned cpb_removal_delay_length_minus1, dpb_output_delay_length_minus1; |
68 | Boolean CpbDpbDelaysPresentFlag, pic_struct_present_flag; |
69 | double DeltaTfiDivisor; |
70 | }; |
71 | |
72 | |
73 | ////////// H264or5VideoStreamFramer implementation ////////// |
74 | |
75 | H264or5VideoStreamFramer |
76 | ::H264or5VideoStreamFramer(int hNumber, UsageEnvironment& env, FramedSource* inputSource, |
77 | Boolean createParser, |
78 | Boolean includeStartCodeInOutput, Boolean insertAccessUnitDelimiters) |
79 | : MPEGVideoStreamFramer(env, inputSource), |
80 | fHNumber(hNumber), fIncludeStartCodeInOutput(includeStartCodeInOutput), |
81 | fInsertAccessUnitDelimiters(insertAccessUnitDelimiters), |
82 | fLastSeenVPS(NULL), fLastSeenVPSSize(0), |
83 | fLastSeenSPS(NULL), fLastSeenSPSSize(0), |
84 | fLastSeenPPS(NULL), fLastSeenPPSSize(0) { |
85 | fParser = createParser |
86 | ? new H264or5VideoStreamParser(hNumber, this, inputSource, includeStartCodeInOutput) |
87 | : NULL; |
88 | fNextPresentationTime = fPresentationTimeBase; |
89 | fFrameRate = 25.0; // We assume a frame rate of 25 fps, unless we learn otherwise (from parsing a VPS or SPS NAL unit) |
90 | } |
91 | |
92 | H264or5VideoStreamFramer::~H264or5VideoStreamFramer() { |
93 | delete[] fLastSeenPPS; |
94 | delete[] fLastSeenSPS; |
95 | delete[] fLastSeenVPS; |
96 | } |
97 | |
98 | #define VPS_MAX_SIZE 1000 // larger than the largest possible VPS (Video Parameter Set) NAL unit |
99 | |
100 | void H264or5VideoStreamFramer::saveCopyOfVPS(u_int8_t* from, unsigned size) { |
101 | if (from == NULL) return; |
102 | delete[] fLastSeenVPS; |
103 | fLastSeenVPS = new u_int8_t[size]; |
104 | memmove(fLastSeenVPS, from, size); |
105 | |
106 | fLastSeenVPSSize = size; |
107 | } |
108 | |
109 | #define SPS_MAX_SIZE 1000 // larger than the largest possible SPS (Sequence Parameter Set) NAL unit |
110 | |
111 | void H264or5VideoStreamFramer::saveCopyOfSPS(u_int8_t* from, unsigned size) { |
112 | if (from == NULL) return; |
113 | delete[] fLastSeenSPS; |
114 | fLastSeenSPS = new u_int8_t[size]; |
115 | memmove(fLastSeenSPS, from, size); |
116 | |
117 | fLastSeenSPSSize = size; |
118 | } |
119 | |
120 | void H264or5VideoStreamFramer::saveCopyOfPPS(u_int8_t* from, unsigned size) { |
121 | if (from == NULL) return; |
122 | delete[] fLastSeenPPS; |
123 | fLastSeenPPS = new u_int8_t[size]; |
124 | memmove(fLastSeenPPS, from, size); |
125 | |
126 | fLastSeenPPSSize = size; |
127 | } |
128 | |
129 | Boolean H264or5VideoStreamFramer::isVPS(u_int8_t nal_unit_type) { |
130 | // VPS NAL units occur in H.265 only: |
131 | return fHNumber == 265 && nal_unit_type == 32; |
132 | } |
133 | |
134 | Boolean H264or5VideoStreamFramer::isSPS(u_int8_t nal_unit_type) { |
135 | return fHNumber == 264 ? nal_unit_type == 7 : nal_unit_type == 33; |
136 | } |
137 | |
138 | Boolean H264or5VideoStreamFramer::isPPS(u_int8_t nal_unit_type) { |
139 | return fHNumber == 264 ? nal_unit_type == 8 : nal_unit_type == 34; |
140 | } |
141 | |
142 | Boolean H264or5VideoStreamFramer::isVCL(u_int8_t nal_unit_type) { |
143 | return fHNumber == 264 |
144 | ? (nal_unit_type <= 5 && nal_unit_type > 0) |
145 | : (nal_unit_type <= 31); |
146 | } |
147 | |
148 | void H264or5VideoStreamFramer::doGetNextFrame() { |
149 | if (fInsertAccessUnitDelimiters && pictureEndMarker()) { |
150 | // Deliver an "access_unit_delimiter" NAL unit instead: |
151 | unsigned const startCodeSize = fIncludeStartCodeInOutput ? 4: 0; |
152 | unsigned const audNALSize = fHNumber == 264 ? 2 : 3; |
153 | |
154 | fFrameSize = startCodeSize + audNALSize; |
155 | if (fFrameSize > fMaxSize) { // there's no space |
156 | fNumTruncatedBytes = fFrameSize - fMaxSize; |
157 | fFrameSize = fMaxSize; |
158 | handleClosure(); |
159 | return; |
160 | } |
161 | |
162 | if (fIncludeStartCodeInOutput) { |
163 | *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x01; |
164 | } |
165 | if (fHNumber == 264) { |
166 | *fTo++ = 9; // "Access unit delimiter" nal_unit_type |
167 | *fTo++ = 0xF0; // "primary_pic_type" (7); "rbsp_trailing_bits()" |
168 | } else { // H.265 |
169 | *fTo++ = 35<<1; // "Access unit delimiter" nal_unit_type |
170 | *fTo++ = 0; // "nuh_layer_id" (0); "nuh_temporal_id_plus1" (0) (Is this correct??) |
171 | *fTo++ = 0x50; // "pic_type" (2); "rbsp_trailing_bits()" (Is this correct??) |
172 | } |
173 | |
174 | pictureEndMarker() = False; // for next time |
175 | afterGetting(this); |
176 | } else { |
177 | // Do the normal delivery of a NAL unit from the parser: |
178 | MPEGVideoStreamFramer::doGetNextFrame(); |
179 | } |
180 | } |
181 | |
182 | |
183 | ////////// H264or5VideoStreamParser implementation ////////// |
184 | |
185 | H264or5VideoStreamParser |
186 | ::H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource, |
187 | FramedSource* inputSource, Boolean includeStartCodeInOutput) |
188 | : MPEGVideoStreamParser(usingSource, inputSource), |
189 | fHNumber(hNumber), fOutputStartCodeSize(includeStartCodeInOutput ? 4 : 0), fHaveSeenFirstStartCode(False), fHaveSeenFirstByteOfNALUnit(False), fParsedFrameRate(0.0), |
190 | cpb_removal_delay_length_minus1(23), dpb_output_delay_length_minus1(23), |
191 | CpbDpbDelaysPresentFlag(0), pic_struct_present_flag(0), |
192 | DeltaTfiDivisor(2.0) { |
193 | } |
194 | |
195 | H264or5VideoStreamParser::~H264or5VideoStreamParser() { |
196 | } |
197 | |
198 | #define PREFIX_SEI_NUT 39 // for H.265 |
199 | #define SUFFIX_SEI_NUT 40 // for H.265 |
200 | Boolean H264or5VideoStreamParser::isSEI(u_int8_t nal_unit_type) { |
201 | return fHNumber == 264 |
202 | ? nal_unit_type == 6 |
203 | : (nal_unit_type == PREFIX_SEI_NUT || nal_unit_type == SUFFIX_SEI_NUT); |
204 | } |
205 | |
206 | Boolean H264or5VideoStreamParser::isEOF(u_int8_t nal_unit_type) { |
207 | // "end of sequence" or "end of (bit)stream" |
208 | return fHNumber == 264 |
209 | ? (nal_unit_type == 10 || nal_unit_type == 11) |
210 | : (nal_unit_type == 36 || nal_unit_type == 37); |
211 | } |
212 | |
213 | Boolean H264or5VideoStreamParser::usuallyBeginsAccessUnit(u_int8_t nal_unit_type) { |
214 | return fHNumber == 264 |
215 | ? (nal_unit_type >= 6 && nal_unit_type <= 9) || (nal_unit_type >= 14 && nal_unit_type <= 18) |
216 | : (nal_unit_type >= 32 && nal_unit_type <= 35) || (nal_unit_type == 39) |
217 | || (nal_unit_type >= 41 && nal_unit_type <= 44) |
218 | || (nal_unit_type >= 48 && nal_unit_type <= 55); |
219 | } |
220 | |
221 | void H264or5VideoStreamParser |
222 | ::removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize) { |
223 | u_int8_t const* nalUnitOrig = fStartOfFrame + fOutputStartCodeSize; |
224 | unsigned const numBytesInNALunit = fTo - nalUnitOrig; |
225 | nalUnitCopySize |
226 | = removeH264or5EmulationBytes(nalUnitCopy, maxSize, nalUnitOrig, numBytesInNALunit); |
227 | } |
228 | |
229 | #ifdef DEBUG |
230 | char const* nal_unit_type_description_h264[32] = { |
231 | "Unspecified" , //0 |
232 | "Coded slice of a non-IDR picture" , //1 |
233 | "Coded slice data partition A" , //2 |
234 | "Coded slice data partition B" , //3 |
235 | "Coded slice data partition C" , //4 |
236 | "Coded slice of an IDR picture" , //5 |
237 | "Supplemental enhancement information (SEI)" , //6 |
238 | "Sequence parameter set" , //7 |
239 | "Picture parameter set" , //8 |
240 | "Access unit delimiter" , //9 |
241 | "End of sequence" , //10 |
242 | "End of stream" , //11 |
243 | "Filler data" , //12 |
244 | "Sequence parameter set extension" , //13 |
245 | "Prefix NAL unit" , //14 |
246 | "Subset sequence parameter set" , //15 |
247 | "Reserved" , //16 |
248 | "Reserved" , //17 |
249 | "Reserved" , //18 |
250 | "Coded slice of an auxiliary coded picture without partitioning" , //19 |
251 | "Coded slice extension" , //20 |
252 | "Reserved" , //21 |
253 | "Reserved" , //22 |
254 | "Reserved" , //23 |
255 | "Unspecified" , //24 |
256 | "Unspecified" , //25 |
257 | "Unspecified" , //26 |
258 | "Unspecified" , //27 |
259 | "Unspecified" , //28 |
260 | "Unspecified" , //29 |
261 | "Unspecified" , //30 |
262 | "Unspecified" //31 |
263 | }; |
264 | char const* nal_unit_type_description_h265[64] = { |
265 | "Coded slice segment of a non-TSA, non-STSA trailing picture" , //0 |
266 | "Coded slice segment of a non-TSA, non-STSA trailing picture" , //1 |
267 | "Coded slice segment of a TSA picture" , //2 |
268 | "Coded slice segment of a TSA picture" , //3 |
269 | "Coded slice segment of a STSA picture" , //4 |
270 | "Coded slice segment of a STSA picture" , //5 |
271 | "Coded slice segment of a RADL picture" , //6 |
272 | "Coded slice segment of a RADL picture" , //7 |
273 | "Coded slice segment of a RASL picture" , //8 |
274 | "Coded slice segment of a RASL picture" , //9 |
275 | "Reserved" , //10 |
276 | "Reserved" , //11 |
277 | "Reserved" , //12 |
278 | "Reserved" , //13 |
279 | "Reserved" , //14 |
280 | "Reserved" , //15 |
281 | "Coded slice segment of a BLA picture" , //16 |
282 | "Coded slice segment of a BLA picture" , //17 |
283 | "Coded slice segment of a BLA picture" , //18 |
284 | "Coded slice segment of an IDR picture" , //19 |
285 | "Coded slice segment of an IDR picture" , //20 |
286 | "Coded slice segment of a CRA picture" , //21 |
287 | "Reserved" , //22 |
288 | "Reserved" , //23 |
289 | "Reserved" , //24 |
290 | "Reserved" , //25 |
291 | "Reserved" , //26 |
292 | "Reserved" , //27 |
293 | "Reserved" , //28 |
294 | "Reserved" , //29 |
295 | "Reserved" , //30 |
296 | "Reserved" , //31 |
297 | "Video parameter set" , //32 |
298 | "Sequence parameter set" , //33 |
299 | "Picture parameter set" , //34 |
300 | "Access unit delimiter" , //35 |
301 | "End of sequence" , //36 |
302 | "End of bitstream" , //37 |
303 | "Filler data" , //38 |
304 | "Supplemental enhancement information (SEI)" , //39 |
305 | "Supplemental enhancement information (SEI)" , //40 |
306 | "Reserved" , //41 |
307 | "Reserved" , //42 |
308 | "Reserved" , //43 |
309 | "Reserved" , //44 |
310 | "Reserved" , //45 |
311 | "Reserved" , //46 |
312 | "Reserved" , //47 |
313 | "Unspecified" , //48 |
314 | "Unspecified" , //49 |
315 | "Unspecified" , //50 |
316 | "Unspecified" , //51 |
317 | "Unspecified" , //52 |
318 | "Unspecified" , //53 |
319 | "Unspecified" , //54 |
320 | "Unspecified" , //55 |
321 | "Unspecified" , //56 |
322 | "Unspecified" , //57 |
323 | "Unspecified" , //58 |
324 | "Unspecified" , //59 |
325 | "Unspecified" , //60 |
326 | "Unspecified" , //61 |
327 | "Unspecified" , //62 |
328 | "Unspecified" , //63 |
329 | }; |
330 | #endif |
331 | |
332 | #ifdef DEBUG |
333 | static unsigned numDebugTabs = 1; |
334 | #define DEBUG_PRINT_TABS for (unsigned _i = 0; _i < numDebugTabs; ++_i) fprintf(stderr, "\t") |
335 | #define DEBUG_PRINT(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s: %d\n", #x, x); } while (0) |
336 | #define DEBUG_STR(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s\n", x); } while (0) |
337 | class DebugTab { |
338 | public: |
339 | DebugTab() {++numDebugTabs;} |
340 | ~DebugTab() {--numDebugTabs;} |
341 | }; |
342 | #define DEBUG_TAB DebugTab dummy |
343 | #else |
344 | #define DEBUG_PRINT(x) do {x = x;} while (0) |
345 | // Note: the "x=x;" statement is intended to eliminate "unused variable" compiler warning messages |
346 | #define DEBUG_STR(x) do {} while (0) |
347 | #define DEBUG_TAB do {} while (0) |
348 | #endif |
349 | |
350 | void H264or5VideoStreamParser::profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1) { |
351 | bv.skipBits(96); |
352 | |
353 | unsigned i; |
354 | Boolean sub_layer_profile_present_flag[7], sub_layer_level_present_flag[7]; |
355 | for (i = 0; i < max_sub_layers_minus1; ++i) { |
356 | sub_layer_profile_present_flag[i] = bv.get1BitBoolean(); |
357 | sub_layer_level_present_flag[i] = bv.get1BitBoolean(); |
358 | } |
359 | if (max_sub_layers_minus1 > 0) { |
360 | bv.skipBits(2*(8-max_sub_layers_minus1)); // reserved_zero_2bits |
361 | } |
362 | for (i = 0; i < max_sub_layers_minus1; ++i) { |
363 | if (sub_layer_profile_present_flag[i]) { |
364 | bv.skipBits(88); |
365 | } |
366 | if (sub_layer_level_present_flag[i]) { |
367 | bv.skipBits(8); // sub_layer_level_idc[i] |
368 | } |
369 | } |
370 | } |
371 | |
372 | void H264or5VideoStreamParser |
373 | ::analyze_vui_parameters(BitVector& bv, |
374 | unsigned& num_units_in_tick, unsigned& time_scale) { |
375 | Boolean aspect_ratio_info_present_flag = bv.get1BitBoolean(); |
376 | DEBUG_PRINT(aspect_ratio_info_present_flag); |
377 | if (aspect_ratio_info_present_flag) { |
378 | DEBUG_TAB; |
379 | unsigned aspect_ratio_idc = bv.getBits(8); |
380 | DEBUG_PRINT(aspect_ratio_idc); |
381 | if (aspect_ratio_idc == 255/*Extended_SAR*/) { |
382 | bv.skipBits(32); // sar_width; sar_height |
383 | } |
384 | } |
385 | Boolean overscan_info_present_flag = bv.get1BitBoolean(); |
386 | DEBUG_PRINT(overscan_info_present_flag); |
387 | if (overscan_info_present_flag) { |
388 | bv.skipBits(1); // overscan_appropriate_flag |
389 | } |
390 | Boolean video_signal_type_present_flag = bv.get1BitBoolean(); |
391 | DEBUG_PRINT(video_signal_type_present_flag); |
392 | if (video_signal_type_present_flag) { |
393 | DEBUG_TAB; |
394 | bv.skipBits(4); // video_format; video_full_range_flag |
395 | Boolean colour_description_present_flag = bv.get1BitBoolean(); |
396 | DEBUG_PRINT(colour_description_present_flag); |
397 | if (colour_description_present_flag) { |
398 | bv.skipBits(24); // colour_primaries; transfer_characteristics; matrix_coefficients |
399 | } |
400 | } |
401 | Boolean chroma_loc_info_present_flag = bv.get1BitBoolean(); |
402 | DEBUG_PRINT(chroma_loc_info_present_flag); |
403 | if (chroma_loc_info_present_flag) { |
404 | (void)bv.get_expGolomb(); // chroma_sample_loc_type_top_field |
405 | (void)bv.get_expGolomb(); // chroma_sample_loc_type_bottom_field |
406 | } |
407 | if (fHNumber == 265) { |
408 | bv.skipBits(2); // neutral_chroma_indication_flag, field_seq_flag |
409 | Boolean frame_field_info_present_flag = bv.get1BitBoolean(); |
410 | DEBUG_PRINT(frame_field_info_present_flag); |
411 | pic_struct_present_flag = frame_field_info_present_flag; // hack to make H.265 like H.264 |
412 | Boolean default_display_window_flag = bv.get1BitBoolean(); |
413 | DEBUG_PRINT(default_display_window_flag); |
414 | if (default_display_window_flag) { |
415 | (void)bv.get_expGolomb(); // def_disp_win_left_offset |
416 | (void)bv.get_expGolomb(); // def_disp_win_right_offset |
417 | (void)bv.get_expGolomb(); // def_disp_win_top_offset |
418 | (void)bv.get_expGolomb(); // def_disp_win_bottom_offset |
419 | } |
420 | } |
421 | Boolean timing_info_present_flag = bv.get1BitBoolean(); |
422 | DEBUG_PRINT(timing_info_present_flag); |
423 | if (timing_info_present_flag) { |
424 | DEBUG_TAB; |
425 | num_units_in_tick = bv.getBits(32); |
426 | DEBUG_PRINT(num_units_in_tick); |
427 | time_scale = bv.getBits(32); |
428 | DEBUG_PRINT(time_scale); |
429 | if (fHNumber == 264) { |
430 | Boolean fixed_frame_rate_flag = bv.get1BitBoolean(); |
431 | DEBUG_PRINT(fixed_frame_rate_flag); |
432 | } else { // 265 |
433 | Boolean vui_poc_proportional_to_timing_flag = bv.get1BitBoolean(); |
434 | DEBUG_PRINT(vui_poc_proportional_to_timing_flag); |
435 | if (vui_poc_proportional_to_timing_flag) { |
436 | unsigned vui_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb(); |
437 | DEBUG_PRINT(vui_num_ticks_poc_diff_one_minus1); |
438 | } |
439 | return; // For H.265, don't bother parsing any more of this ##### |
440 | } |
441 | } |
442 | // The following is H.264 only: ##### |
443 | Boolean nal_hrd_parameters_present_flag = bv.get1BitBoolean(); |
444 | DEBUG_PRINT(nal_hrd_parameters_present_flag); |
445 | if (nal_hrd_parameters_present_flag) analyze_hrd_parameters(bv); |
446 | Boolean vcl_hrd_parameters_present_flag = bv.get1BitBoolean(); |
447 | DEBUG_PRINT(vcl_hrd_parameters_present_flag); |
448 | if (vcl_hrd_parameters_present_flag) analyze_hrd_parameters(bv); |
449 | CpbDpbDelaysPresentFlag = nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag; |
450 | if (CpbDpbDelaysPresentFlag) { |
451 | bv.skipBits(1); // low_delay_hrd_flag |
452 | } |
453 | pic_struct_present_flag = bv.get1BitBoolean(); |
454 | DEBUG_PRINT(pic_struct_present_flag); |
455 | } |
456 | |
457 | void H264or5VideoStreamParser::analyze_hrd_parameters(BitVector& bv) { |
458 | DEBUG_TAB; |
459 | unsigned cpb_cnt_minus1 = bv.get_expGolomb(); |
460 | DEBUG_PRINT(cpb_cnt_minus1); |
461 | unsigned bit_rate_scale = bv.getBits(4); |
462 | DEBUG_PRINT(bit_rate_scale); |
463 | unsigned cpb_size_scale = bv.getBits(4); |
464 | DEBUG_PRINT(cpb_size_scale); |
465 | for (unsigned SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; ++SchedSelIdx) { |
466 | DEBUG_TAB; |
467 | DEBUG_PRINT(SchedSelIdx); |
468 | unsigned bit_rate_value_minus1 = bv.get_expGolomb(); |
469 | DEBUG_PRINT(bit_rate_value_minus1); |
470 | unsigned cpb_size_value_minus1 = bv.get_expGolomb(); |
471 | DEBUG_PRINT(cpb_size_value_minus1); |
472 | Boolean cbr_flag = bv.get1BitBoolean(); |
473 | DEBUG_PRINT(cbr_flag); |
474 | } |
475 | unsigned initial_cpb_removal_delay_length_minus1 = bv.getBits(5); |
476 | DEBUG_PRINT(initial_cpb_removal_delay_length_minus1); |
477 | cpb_removal_delay_length_minus1 = bv.getBits(5); |
478 | DEBUG_PRINT(cpb_removal_delay_length_minus1); |
479 | dpb_output_delay_length_minus1 = bv.getBits(5); |
480 | DEBUG_PRINT(dpb_output_delay_length_minus1); |
481 | unsigned time_offset_length = bv.getBits(5); |
482 | DEBUG_PRINT(time_offset_length); |
483 | } |
484 | |
485 | void H264or5VideoStreamParser |
486 | ::analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) { |
487 | num_units_in_tick = time_scale = 0; // default values |
488 | |
489 | // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes: |
490 | u_int8_t vps[VPS_MAX_SIZE]; |
491 | unsigned vpsSize; |
492 | removeEmulationBytes(vps, sizeof vps, vpsSize); |
493 | |
494 | BitVector bv(vps, 0, 8*vpsSize); |
495 | |
496 | // Assert: fHNumber == 265 (because this function is called only when parsing H.265) |
497 | unsigned i; |
498 | |
499 | bv.skipBits(28); // nal_unit_header, vps_video_parameter_set_id, vps_reserved_three_2bits, vps_max_layers_minus1 |
500 | unsigned vps_max_sub_layers_minus1 = bv.getBits(3); |
501 | DEBUG_PRINT(vps_max_sub_layers_minus1); |
502 | bv.skipBits(17); // vps_temporal_id_nesting_flag, vps_reserved_0xffff_16bits |
503 | profile_tier_level(bv, vps_max_sub_layers_minus1); |
504 | Boolean vps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean(); |
505 | DEBUG_PRINT(vps_sub_layer_ordering_info_present_flag); |
506 | for (i = vps_sub_layer_ordering_info_present_flag ? 0 : vps_max_sub_layers_minus1; |
507 | i <= vps_max_sub_layers_minus1; ++i) { |
508 | (void)bv.get_expGolomb(); // vps_max_dec_pic_buffering_minus1[i] |
509 | (void)bv.get_expGolomb(); // vps_max_num_reorder_pics[i] |
510 | (void)bv.get_expGolomb(); // vps_max_latency_increase_plus1[i] |
511 | } |
512 | unsigned vps_max_layer_id = bv.getBits(6); |
513 | DEBUG_PRINT(vps_max_layer_id); |
514 | unsigned vps_num_layer_sets_minus1 = bv.get_expGolomb(); |
515 | DEBUG_PRINT(vps_num_layer_sets_minus1); |
516 | for (i = 1; i <= vps_num_layer_sets_minus1; ++i) { |
517 | bv.skipBits(vps_max_layer_id+1); // layer_id_included_flag[i][0..vps_max_layer_id] |
518 | } |
519 | Boolean vps_timing_info_present_flag = bv.get1BitBoolean(); |
520 | DEBUG_PRINT(vps_timing_info_present_flag); |
521 | if (vps_timing_info_present_flag) { |
522 | DEBUG_TAB; |
523 | num_units_in_tick = bv.getBits(32); |
524 | DEBUG_PRINT(num_units_in_tick); |
525 | time_scale = bv.getBits(32); |
526 | DEBUG_PRINT(time_scale); |
527 | Boolean vps_poc_proportional_to_timing_flag = bv.get1BitBoolean(); |
528 | DEBUG_PRINT(vps_poc_proportional_to_timing_flag); |
529 | if (vps_poc_proportional_to_timing_flag) { |
530 | unsigned vps_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb(); |
531 | DEBUG_PRINT(vps_num_ticks_poc_diff_one_minus1); |
532 | } |
533 | } |
534 | Boolean vps_extension_flag = bv.get1BitBoolean(); |
535 | DEBUG_PRINT(vps_extension_flag); |
536 | } |
537 | |
538 | void H264or5VideoStreamParser |
539 | ::analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) { |
540 | num_units_in_tick = time_scale = 0; // default values |
541 | |
542 | // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes: |
543 | u_int8_t sps[SPS_MAX_SIZE]; |
544 | unsigned spsSize; |
545 | removeEmulationBytes(sps, sizeof sps, spsSize); |
546 | |
547 | BitVector bv(sps, 0, 8*spsSize); |
548 | |
549 | if (fHNumber == 264) { |
550 | bv.skipBits(8); // forbidden_zero_bit; nal_ref_idc; nal_unit_type |
551 | unsigned profile_idc = bv.getBits(8); |
552 | DEBUG_PRINT(profile_idc); |
553 | unsigned constraint_setN_flag = bv.getBits(8); // also "reserved_zero_2bits" at end |
554 | DEBUG_PRINT(constraint_setN_flag); |
555 | unsigned level_idc = bv.getBits(8); |
556 | DEBUG_PRINT(level_idc); |
557 | unsigned seq_parameter_set_id = bv.get_expGolomb(); |
558 | DEBUG_PRINT(seq_parameter_set_id); |
559 | if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ) { |
560 | DEBUG_TAB; |
561 | unsigned chroma_format_idc = bv.get_expGolomb(); |
562 | DEBUG_PRINT(chroma_format_idc); |
563 | if (chroma_format_idc == 3) { |
564 | DEBUG_TAB; |
565 | Boolean separate_colour_plane_flag = bv.get1BitBoolean(); |
566 | DEBUG_PRINT(separate_colour_plane_flag); |
567 | } |
568 | (void)bv.get_expGolomb(); // bit_depth_luma_minus8 |
569 | (void)bv.get_expGolomb(); // bit_depth_chroma_minus8 |
570 | bv.skipBits(1); // qpprime_y_zero_transform_bypass_flag |
571 | Boolean seq_scaling_matrix_present_flag = bv.get1BitBoolean(); |
572 | DEBUG_PRINT(seq_scaling_matrix_present_flag); |
573 | if (seq_scaling_matrix_present_flag) { |
574 | for (int i = 0; i < ((chroma_format_idc != 3) ? 8 : 12); ++i) { |
575 | DEBUG_TAB; |
576 | DEBUG_PRINT(i); |
577 | Boolean seq_scaling_list_present_flag = bv.get1BitBoolean(); |
578 | DEBUG_PRINT(seq_scaling_list_present_flag); |
579 | if (seq_scaling_list_present_flag) { |
580 | DEBUG_TAB; |
581 | unsigned sizeOfScalingList = i < 6 ? 16 : 64; |
582 | unsigned lastScale = 8; |
583 | unsigned nextScale = 8; |
584 | for (unsigned j = 0; j < sizeOfScalingList; ++j) { |
585 | DEBUG_TAB; |
586 | DEBUG_PRINT(j); |
587 | DEBUG_PRINT(nextScale); |
588 | if (nextScale != 0) { |
589 | DEBUG_TAB; |
590 | int delta_scale = bv.get_expGolombSigned(); |
591 | DEBUG_PRINT(delta_scale); |
592 | nextScale = (lastScale + delta_scale + 256) % 256; |
593 | } |
594 | lastScale = (nextScale == 0) ? lastScale : nextScale; |
595 | DEBUG_PRINT(lastScale); |
596 | } |
597 | } |
598 | } |
599 | } |
600 | } |
601 | unsigned log2_max_frame_num_minus4 = bv.get_expGolomb(); |
602 | DEBUG_PRINT(log2_max_frame_num_minus4); |
603 | unsigned pic_order_cnt_type = bv.get_expGolomb(); |
604 | DEBUG_PRINT(pic_order_cnt_type); |
605 | if (pic_order_cnt_type == 0) { |
606 | DEBUG_TAB; |
607 | unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb(); |
608 | DEBUG_PRINT(log2_max_pic_order_cnt_lsb_minus4); |
609 | } else if (pic_order_cnt_type == 1) { |
610 | DEBUG_TAB; |
611 | bv.skipBits(1); // delta_pic_order_always_zero_flag |
612 | (void)bv.get_expGolombSigned(); // offset_for_non_ref_pic |
613 | (void)bv.get_expGolombSigned(); // offset_for_top_to_bottom_field |
614 | unsigned num_ref_frames_in_pic_order_cnt_cycle = bv.get_expGolomb(); |
615 | DEBUG_PRINT(num_ref_frames_in_pic_order_cnt_cycle); |
616 | for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { |
617 | (void)bv.get_expGolombSigned(); // offset_for_ref_frame[i] |
618 | } |
619 | } |
620 | unsigned max_num_ref_frames = bv.get_expGolomb(); |
621 | DEBUG_PRINT(max_num_ref_frames); |
622 | Boolean gaps_in_frame_num_value_allowed_flag = bv.get1BitBoolean(); |
623 | DEBUG_PRINT(gaps_in_frame_num_value_allowed_flag); |
624 | unsigned pic_width_in_mbs_minus1 = bv.get_expGolomb(); |
625 | DEBUG_PRINT(pic_width_in_mbs_minus1); |
626 | unsigned pic_height_in_map_units_minus1 = bv.get_expGolomb(); |
627 | DEBUG_PRINT(pic_height_in_map_units_minus1); |
628 | Boolean frame_mbs_only_flag = bv.get1BitBoolean(); |
629 | DEBUG_PRINT(frame_mbs_only_flag); |
630 | if (!frame_mbs_only_flag) { |
631 | bv.skipBits(1); // mb_adaptive_frame_field_flag |
632 | } |
633 | bv.skipBits(1); // direct_8x8_inference_flag |
634 | Boolean frame_cropping_flag = bv.get1BitBoolean(); |
635 | DEBUG_PRINT(frame_cropping_flag); |
636 | if (frame_cropping_flag) { |
637 | (void)bv.get_expGolomb(); // frame_crop_left_offset |
638 | (void)bv.get_expGolomb(); // frame_crop_right_offset |
639 | (void)bv.get_expGolomb(); // frame_crop_top_offset |
640 | (void)bv.get_expGolomb(); // frame_crop_bottom_offset |
641 | } |
642 | Boolean vui_parameters_present_flag = bv.get1BitBoolean(); |
643 | DEBUG_PRINT(vui_parameters_present_flag); |
644 | if (vui_parameters_present_flag) { |
645 | DEBUG_TAB; |
646 | analyze_vui_parameters(bv, num_units_in_tick, time_scale); |
647 | } |
648 | } else { // 265 |
649 | unsigned i; |
650 | |
651 | bv.skipBits(16); // nal_unit_header |
652 | bv.skipBits(4); // sps_video_parameter_set_id |
653 | unsigned sps_max_sub_layers_minus1 = bv.getBits(3); |
654 | DEBUG_PRINT(sps_max_sub_layers_minus1); |
655 | bv.skipBits(1); // sps_temporal_id_nesting_flag |
656 | profile_tier_level(bv, sps_max_sub_layers_minus1); |
657 | (void)bv.get_expGolomb(); // sps_seq_parameter_set_id |
658 | unsigned chroma_format_idc = bv.get_expGolomb(); |
659 | DEBUG_PRINT(chroma_format_idc); |
660 | if (chroma_format_idc == 3) bv.skipBits(1); // separate_colour_plane_flag |
661 | unsigned pic_width_in_luma_samples = bv.get_expGolomb(); |
662 | DEBUG_PRINT(pic_width_in_luma_samples); |
663 | unsigned pic_height_in_luma_samples = bv.get_expGolomb(); |
664 | DEBUG_PRINT(pic_height_in_luma_samples); |
665 | Boolean conformance_window_flag = bv.get1BitBoolean(); |
666 | DEBUG_PRINT(conformance_window_flag); |
667 | if (conformance_window_flag) { |
668 | DEBUG_TAB; |
669 | unsigned conf_win_left_offset = bv.get_expGolomb(); |
670 | DEBUG_PRINT(conf_win_left_offset); |
671 | unsigned conf_win_right_offset = bv.get_expGolomb(); |
672 | DEBUG_PRINT(conf_win_right_offset); |
673 | unsigned conf_win_top_offset = bv.get_expGolomb(); |
674 | DEBUG_PRINT(conf_win_top_offset); |
675 | unsigned conf_win_bottom_offset = bv.get_expGolomb(); |
676 | DEBUG_PRINT(conf_win_bottom_offset); |
677 | } |
678 | (void)bv.get_expGolomb(); // bit_depth_luma_minus8 |
679 | (void)bv.get_expGolomb(); // bit_depth_chroma_minus8 |
680 | unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb(); |
681 | Boolean sps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean(); |
682 | DEBUG_PRINT(sps_sub_layer_ordering_info_present_flag); |
683 | for (i = (sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1); |
684 | i <= sps_max_sub_layers_minus1; ++i) { |
685 | (void)bv.get_expGolomb(); // sps_max_dec_pic_buffering_minus1[i] |
686 | (void)bv.get_expGolomb(); // sps_max_num_reorder_pics[i] |
687 | (void)bv.get_expGolomb(); // sps_max_latency_increase[i] |
688 | } |
689 | (void)bv.get_expGolomb(); // log2_min_luma_coding_block_size_minus3 |
690 | (void)bv.get_expGolomb(); // log2_diff_max_min_luma_coding_block_size |
691 | (void)bv.get_expGolomb(); // log2_min_transform_block_size_minus2 |
692 | (void)bv.get_expGolomb(); // log2_diff_max_min_transform_block_size |
693 | (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_inter |
694 | (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_intra |
695 | Boolean scaling_list_enabled_flag = bv.get1BitBoolean(); |
696 | DEBUG_PRINT(scaling_list_enabled_flag); |
697 | if (scaling_list_enabled_flag) { |
698 | DEBUG_TAB; |
699 | Boolean sps_scaling_list_data_present_flag = bv.get1BitBoolean(); |
700 | DEBUG_PRINT(sps_scaling_list_data_present_flag); |
701 | if (sps_scaling_list_data_present_flag) { |
702 | // scaling_list_data() |
703 | DEBUG_TAB; |
704 | for (unsigned sizeId = 0; sizeId < 4; ++sizeId) { |
705 | DEBUG_PRINT(sizeId); |
706 | for (unsigned matrixId = 0; matrixId < (sizeId == 3 ? 2 : 6); ++matrixId) { |
707 | DEBUG_TAB; |
708 | DEBUG_PRINT(matrixId); |
709 | Boolean scaling_list_pred_mode_flag = bv.get1BitBoolean(); |
710 | DEBUG_PRINT(scaling_list_pred_mode_flag); |
711 | if (!scaling_list_pred_mode_flag) { |
712 | (void)bv.get_expGolomb(); // scaling_list_pred_matrix_id_delta[sizeId][matrixId] |
713 | } else { |
714 | unsigned const c = 1 << (4+(sizeId<<1)); |
715 | unsigned coefNum = c < 64 ? c : 64; |
716 | if (sizeId > 1) { |
717 | (void)bv.get_expGolomb(); // scaling_list_dc_coef_minus8[sizeId][matrixId] |
718 | } |
719 | for (i = 0; i < coefNum; ++i) { |
720 | (void)bv.get_expGolomb(); // scaling_list_delta_coef |
721 | } |
722 | } |
723 | } |
724 | } |
725 | } |
726 | } |
727 | bv.skipBits(2); // amp_enabled_flag, sample_adaptive_offset_enabled_flag |
728 | Boolean pcm_enabled_flag = bv.get1BitBoolean(); |
729 | DEBUG_PRINT(pcm_enabled_flag); |
730 | if (pcm_enabled_flag) { |
731 | bv.skipBits(8); // pcm_sample_bit_depth_luma_minus1, pcm_sample_bit_depth_chroma_minus1 |
732 | (void)bv.get_expGolomb(); // log2_min_pcm_luma_coding_block_size_minus3 |
733 | (void)bv.get_expGolomb(); // log2_diff_max_min_pcm_luma_coding_block_size |
734 | bv.skipBits(1); // pcm_loop_filter_disabled_flag |
735 | } |
736 | unsigned num_short_term_ref_pic_sets = bv.get_expGolomb(); |
737 | DEBUG_PRINT(num_short_term_ref_pic_sets); |
738 | unsigned num_negative_pics = 0, prev_num_negative_pics = 0; |
739 | unsigned num_positive_pics = 0, prev_num_positive_pics = 0; |
740 | for (i = 0; i < num_short_term_ref_pic_sets; ++i) { |
741 | // short_term_ref_pic_set(i): |
742 | DEBUG_TAB; |
743 | DEBUG_PRINT(i); |
744 | Boolean inter_ref_pic_set_prediction_flag = False; |
745 | if (i != 0) { |
746 | inter_ref_pic_set_prediction_flag = bv.get1BitBoolean(); |
747 | } |
748 | DEBUG_PRINT(inter_ref_pic_set_prediction_flag); |
749 | if (inter_ref_pic_set_prediction_flag) { |
750 | DEBUG_TAB; |
751 | if (i == num_short_term_ref_pic_sets) { |
752 | // This can't happen here, but it's in the spec, so we include it for completeness |
753 | (void)bv.get_expGolomb(); // delta_idx_minus1 |
754 | } |
755 | bv.skipBits(1); // delta_rps_sign |
756 | (void)bv.get_expGolomb(); // abs_delta_rps_minus1 |
757 | unsigned NumDeltaPocs = prev_num_negative_pics + prev_num_positive_pics; // correct??? |
758 | for (unsigned j = 0; j < NumDeltaPocs; ++j) { |
759 | DEBUG_PRINT(j); |
760 | Boolean used_by_curr_pic_flag = bv.get1BitBoolean(); |
761 | DEBUG_PRINT(used_by_curr_pic_flag); |
762 | if (!used_by_curr_pic_flag) bv.skipBits(1); // use_delta_flag[j] |
763 | } |
764 | } else { |
765 | prev_num_negative_pics = num_negative_pics; |
766 | num_negative_pics = bv.get_expGolomb(); |
767 | DEBUG_PRINT(num_negative_pics); |
768 | prev_num_positive_pics = num_positive_pics; |
769 | num_positive_pics = bv.get_expGolomb(); |
770 | DEBUG_PRINT(num_positive_pics); |
771 | unsigned k; |
772 | for (k = 0; k < num_negative_pics; ++k) { |
773 | (void)bv.get_expGolomb(); // delta_poc_s0_minus1[k] |
774 | bv.skipBits(1); // used_by_curr_pic_s0_flag[k] |
775 | } |
776 | for (k = 0; k < num_positive_pics; ++k) { |
777 | (void)bv.get_expGolomb(); // delta_poc_s1_minus1[k] |
778 | bv.skipBits(1); // used_by_curr_pic_s1_flag[k] |
779 | } |
780 | } |
781 | } |
782 | Boolean long_term_ref_pics_present_flag = bv.get1BitBoolean(); |
783 | DEBUG_PRINT(long_term_ref_pics_present_flag); |
784 | if (long_term_ref_pics_present_flag) { |
785 | DEBUG_TAB; |
786 | unsigned num_long_term_ref_pics_sps = bv.get_expGolomb(); |
787 | DEBUG_PRINT(num_long_term_ref_pics_sps); |
788 | for (i = 0; i < num_long_term_ref_pics_sps; ++i) { |
789 | bv.skipBits(log2_max_pic_order_cnt_lsb_minus4); // lt_ref_pic_poc_lsb_sps[i] |
790 | bv.skipBits(1); // used_by_curr_pic_lt_sps_flag[1] |
791 | } |
792 | } |
793 | bv.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag |
794 | Boolean vui_parameters_present_flag = bv.get1BitBoolean(); |
795 | DEBUG_PRINT(vui_parameters_present_flag); |
796 | if (vui_parameters_present_flag) { |
797 | DEBUG_TAB; |
798 | analyze_vui_parameters(bv, num_units_in_tick, time_scale); |
799 | } |
800 | Boolean sps_extension_flag = bv.get1BitBoolean(); |
801 | DEBUG_PRINT(sps_extension_flag); |
802 | } |
803 | } |
804 | |
805 | #define SEI_MAX_SIZE 5000 // larger than the largest possible SEI NAL unit |
806 | |
807 | #ifdef DEBUG |
808 | #define MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264 46 |
809 | char const* sei_payloadType_description_h264[MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264+1] = { |
810 | "buffering_period" , //0 |
811 | "pic_timing" , //1 |
812 | "pan_scan_rect" , //2 |
813 | "filler_payload" , //3 |
814 | "user_data_registered_itu_t_t35" , //4 |
815 | "user_data_unregistered" , //5 |
816 | "recovery_point" , //6 |
817 | "dec_ref_pic_marking_repetition" , //7 |
818 | "spare_pic" , //8 |
819 | "scene_info" , //9 |
820 | "sub_seq_info" , //10 |
821 | "sub_seq_layer_characteristics" , //11 |
822 | "sub_seq_characteristics" , //12 |
823 | "full_frame_freeze" , //13 |
824 | "full_frame_freeze_release" , //14 |
825 | "full_frame_snapshot" , //15 |
826 | "progressive_refinement_segment_start" , //16 |
827 | "progressive_refinement_segment_end" , //17 |
828 | "motion_constrained_slice_group_set" , //18 |
829 | "film_grain_characteristics" , //19 |
830 | "deblocking_filter_display_preference" , //20 |
831 | "stereo_video_info" , //21 |
832 | "post_filter_hint" , //22 |
833 | "tone_mapping_info" , //23 |
834 | "scalability_info" , //24 |
835 | "sub_pic_scalable_layer" , //25 |
836 | "non_required_layer_rep" , //26 |
837 | "priority_layer_info" , //27 |
838 | "layers_not_present" , //28 |
839 | "layer_dependency_change" , //29 |
840 | "scalable_nesting" , //30 |
841 | "base_layer_temporal_hrd" , //31 |
842 | "quality_layer_integrity_check" , //32 |
843 | "redundant_pic_property" , //33 |
844 | "tl0_dep_rep_index" , //34 |
845 | "tl_switching_point" , //35 |
846 | "parallel_decoding_info" , //36 |
847 | "mvc_scalable_nesting" , //37 |
848 | "view_scalability_info" , //38 |
849 | "multiview_scene_info" , //39 |
850 | "multiview_acquisition_info" , //40 |
851 | "non_required_view_component" , //41 |
852 | "view_dependency_change" , //42 |
853 | "operation_points_not_present" , //43 |
854 | "base_view_temporal_hrd" , //44 |
855 | "frame_packing_arrangement" , //45 |
856 | "reserved_sei_message" // 46 or higher |
857 | }; |
858 | #endif |
859 | |
860 | void H264or5VideoStreamParser::analyze_sei_data(u_int8_t nal_unit_type) { |
861 | // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes: |
862 | u_int8_t sei[SEI_MAX_SIZE]; |
863 | unsigned seiSize; |
864 | removeEmulationBytes(sei, sizeof sei, seiSize); |
865 | |
866 | unsigned j = 1; // skip the initial byte (forbidden_zero_bit; nal_ref_idc; nal_unit_type); we've already seen it |
867 | while (j < seiSize) { |
868 | unsigned payloadType = 0; |
869 | do { |
870 | payloadType += sei[j]; |
871 | } while (sei[j++] == 255 && j < seiSize); |
872 | if (j >= seiSize) break; |
873 | |
874 | unsigned payloadSize = 0; |
875 | do { |
876 | payloadSize += sei[j]; |
877 | } while (sei[j++] == 255 && j < seiSize); |
878 | if (j >= seiSize) break; |
879 | |
880 | #ifdef DEBUG |
881 | char const* description; |
882 | if (fHNumber == 264) { |
883 | unsigned descriptionNum = payloadType <= MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264 |
884 | ? payloadType : MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264; |
885 | description = sei_payloadType_description_h264[descriptionNum]; |
886 | } else { // 265 |
887 | description = |
888 | payloadType == 3 ? "filler_payload" : |
889 | payloadType == 4 ? "user_data_registered_itu_t_t35" : |
890 | payloadType == 5 ? "user_data_unregistered" : |
891 | payloadType == 17 ? "progressive_refinement_segment_end" : |
892 | payloadType == 22 ? "post_filter_hint" : |
893 | (payloadType == 132 && nal_unit_type == SUFFIX_SEI_NUT) ? "decoded_picture_hash" : |
894 | nal_unit_type == SUFFIX_SEI_NUT ? "reserved_sei_message" : |
895 | payloadType == 0 ? "buffering_period" : |
896 | payloadType == 1 ? "pic_timing" : |
897 | payloadType == 2 ? "pan_scan_rect" : |
898 | payloadType == 6 ? "recovery_point" : |
899 | payloadType == 9 ? "scene_info" : |
900 | payloadType == 15 ? "picture_snapshot" : |
901 | payloadType == 16 ? "progressive_refinement_segment_start" : |
902 | payloadType == 19 ? "film_grain_characteristics" : |
903 | payloadType == 23 ? "tone_mapping_info" : |
904 | payloadType == 45 ? "frame_packing_arrangement" : |
905 | payloadType == 47 ? "display_orientation" : |
906 | payloadType == 128 ? "structure_of_pictures_info" : |
907 | payloadType == 129 ? "active_parameter_sets" : |
908 | payloadType == 130 ? "decoding_unit_info" : |
909 | payloadType == 131 ? "temporal_sub_layer_zero_index" : |
910 | payloadType == 133 ? "scalable_nesting" : |
911 | payloadType == 134 ? "region_refresh_info" : "reserved_sei_message" ; |
912 | } |
913 | fprintf(stderr, "\tpayloadType %d (\"%s\"); payloadSize %d\n" , payloadType, description, payloadSize); |
914 | #endif |
915 | |
916 | analyze_sei_payload(payloadType, payloadSize, &sei[j]); |
917 | j += payloadSize; |
918 | } |
919 | } |
920 | |
921 | void H264or5VideoStreamParser |
922 | ::analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload) { |
923 | if (payloadType == 1/* pic_timing, for both H.264 and H.265 */) { |
924 | BitVector bv(payload, 0, 8*payloadSize); |
925 | |
926 | DEBUG_TAB; |
927 | if (CpbDpbDelaysPresentFlag) { |
928 | unsigned cpb_removal_delay = bv.getBits(cpb_removal_delay_length_minus1 + 1); |
929 | DEBUG_PRINT(cpb_removal_delay); |
930 | unsigned dpb_output_delay = bv.getBits(dpb_output_delay_length_minus1 + 1); |
931 | DEBUG_PRINT(dpb_output_delay); |
932 | } |
933 | double prevDeltaTfiDivisor = DeltaTfiDivisor; |
934 | if (pic_struct_present_flag) { |
935 | unsigned pic_struct = bv.getBits(4); |
936 | DEBUG_PRINT(pic_struct); |
937 | // Use this to set "DeltaTfiDivisor" (which is used to compute the frame rate): |
938 | if (fHNumber == 264) { |
939 | DeltaTfiDivisor = |
940 | pic_struct == 0 ? 2.0 : |
941 | pic_struct <= 2 ? 1.0 : |
942 | pic_struct <= 4 ? 2.0 : |
943 | pic_struct <= 6 ? 3.0 : |
944 | pic_struct == 7 ? 4.0 : |
945 | pic_struct == 8 ? 6.0 : |
946 | 2.0; |
947 | } else { // H.265 |
948 | DeltaTfiDivisor = |
949 | pic_struct == 0 ? 2.0 : |
950 | pic_struct <= 2 ? 1.0 : |
951 | pic_struct <= 4 ? 2.0 : |
952 | pic_struct <= 6 ? 3.0 : |
953 | pic_struct == 7 ? 2.0 : |
954 | pic_struct == 8 ? 3.0 : |
955 | pic_struct <= 12 ? 1.0 : |
956 | 2.0; |
957 | } |
958 | } else { |
959 | if (fHNumber == 264) { |
960 | // Need to get field_pic_flag from slice_header to set this properly! ##### |
961 | } else { // H.265 |
962 | DeltaTfiDivisor = 1.0; |
963 | } |
964 | } |
965 | // If "DeltaTfiDivisor" has changed, and we've already computed the frame rate, then |
966 | // adjust it, based on the new value of "DeltaTfiDivisor": |
967 | if (DeltaTfiDivisor != prevDeltaTfiDivisor && fParsedFrameRate != 0.0) { |
968 | usingSource()->fFrameRate = fParsedFrameRate |
969 | = fParsedFrameRate*(prevDeltaTfiDivisor/DeltaTfiDivisor); |
970 | #ifdef DEBUG |
971 | fprintf(stderr, "Changed frame rate to %f fps\n" , usingSource()->fFrameRate); |
972 | #endif |
973 | } |
974 | // Ignore the rest of the payload (timestamps) for now... ##### |
975 | } |
976 | } |
977 | |
978 | void H264or5VideoStreamParser::flushInput() { |
979 | fHaveSeenFirstStartCode = False; |
980 | fHaveSeenFirstByteOfNALUnit = False; |
981 | |
982 | StreamParser::flushInput(); |
983 | } |
984 | |
985 | unsigned H264or5VideoStreamParser::parse() { |
986 | try { |
987 | // The stream must start with a 0x00000001: |
988 | if (!fHaveSeenFirstStartCode) { |
989 | // Skip over any input bytes that precede the first 0x00000001: |
990 | u_int32_t first4Bytes; |
991 | while ((first4Bytes = test4Bytes()) != 0x00000001) { |
992 | get1Byte(); setParseState(); // ensures that we progress over bad data |
993 | } |
994 | skipBytes(4); // skip this initial code |
995 | |
996 | setParseState(); |
997 | fHaveSeenFirstStartCode = True; // from now on |
998 | } |
999 | |
1000 | if (fOutputStartCodeSize > 0 && curFrameSize() == 0 && !haveSeenEOF()) { |
1001 | // Include a start code in the output: |
1002 | save4Bytes(0x00000001); |
1003 | } |
1004 | |
1005 | // Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF. |
1006 | // Also make note of the first byte, because it contains the "nal_unit_type": |
1007 | if (haveSeenEOF()) { |
1008 | // We hit EOF the last time that we tried to parse this data, so we know that any remaining unparsed data |
1009 | // forms a complete NAL unit, and that there's no 'start code' at the end: |
1010 | unsigned remainingDataSize = totNumValidBytes() - curOffset(); |
1011 | #ifdef DEBUG |
1012 | unsigned const trailingNALUnitSize = remainingDataSize; |
1013 | #endif |
1014 | while (remainingDataSize > 0) { |
1015 | u_int8_t nextByte = get1Byte(); |
1016 | if (!fHaveSeenFirstByteOfNALUnit) { |
1017 | fFirstByteOfNALUnit = nextByte; |
1018 | fHaveSeenFirstByteOfNALUnit = True; |
1019 | } |
1020 | saveByte(nextByte); |
1021 | --remainingDataSize; |
1022 | } |
1023 | |
1024 | #ifdef DEBUG |
1025 | if (fHNumber == 264) { |
1026 | u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5; |
1027 | u_int8_t nal_unit_type = fFirstByteOfNALUnit&0x1F; |
1028 | fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n" , |
1029 | trailingNALUnitSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]); |
1030 | } else { // 265 |
1031 | u_int8_t nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1; |
1032 | fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n" , |
1033 | trailingNALUnitSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]); |
1034 | } |
1035 | #endif |
1036 | |
1037 | (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time |
1038 | return 0; |
1039 | } else { |
1040 | u_int32_t next4Bytes = test4Bytes(); |
1041 | if (!fHaveSeenFirstByteOfNALUnit) { |
1042 | fFirstByteOfNALUnit = next4Bytes>>24; |
1043 | fHaveSeenFirstByteOfNALUnit = True; |
1044 | } |
1045 | while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) { |
1046 | // We save at least some of "next4Bytes". |
1047 | if ((unsigned)(next4Bytes&0xFF) > 1) { |
1048 | // Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it: |
1049 | save4Bytes(next4Bytes); |
1050 | skipBytes(4); |
1051 | } else { |
1052 | // Save the first byte, and continue testing the rest: |
1053 | saveByte(next4Bytes>>24); |
1054 | skipBytes(1); |
1055 | } |
1056 | setParseState(); // ensures forward progress |
1057 | next4Bytes = test4Bytes(); |
1058 | } |
1059 | // Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit). |
1060 | // Skip over these remaining bytes, up until the start of the next NAL unit: |
1061 | if (next4Bytes == 0x00000001) { |
1062 | skipBytes(4); |
1063 | } else { |
1064 | skipBytes(3); |
1065 | } |
1066 | } |
1067 | |
1068 | fHaveSeenFirstByteOfNALUnit = False; // for the next NAL unit that we'll parse |
1069 | u_int8_t nal_unit_type; |
1070 | if (fHNumber == 264) { |
1071 | nal_unit_type = fFirstByteOfNALUnit&0x1F; |
1072 | #ifdef DEBUG |
1073 | u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5; |
1074 | fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n" , |
1075 | curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]); |
1076 | #endif |
1077 | } else { // 265 |
1078 | nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1; |
1079 | #ifdef DEBUG |
1080 | fprintf(stderr, "Parsed %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n" , |
1081 | curFrameSize()-fOutputStartCodeSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]); |
1082 | #endif |
1083 | } |
1084 | |
1085 | // Now that we have found (& copied) a NAL unit, process it if it's of special interest to us: |
1086 | if (isVPS(nal_unit_type)) { // Video parameter set |
1087 | // First, save a copy of this NAL unit, in case the downstream object wants to see it: |
1088 | usingSource()->saveCopyOfVPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize); |
1089 | |
1090 | if (fParsedFrameRate == 0.0) { |
1091 | // We haven't yet parsed a frame rate from the stream. |
1092 | // So parse this NAL unit to check whether frame rate information is present: |
1093 | unsigned num_units_in_tick, time_scale; |
1094 | analyze_video_parameter_set_data(num_units_in_tick, time_scale); |
1095 | if (time_scale > 0 && num_units_in_tick > 0) { |
1096 | usingSource()->fFrameRate = fParsedFrameRate |
1097 | = time_scale/(DeltaTfiDivisor*num_units_in_tick); |
1098 | #ifdef DEBUG |
1099 | fprintf(stderr, "Set frame rate to %f fps\n" , usingSource()->fFrameRate); |
1100 | #endif |
1101 | } else { |
1102 | #ifdef DEBUG |
1103 | fprintf(stderr, "\tThis \"Video Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n" , usingSource()->fFrameRate); |
1104 | #endif |
1105 | } |
1106 | } |
1107 | } else if (isSPS(nal_unit_type)) { // Sequence parameter set |
1108 | // First, save a copy of this NAL unit, in case the downstream object wants to see it: |
1109 | usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize); |
1110 | |
1111 | if (fParsedFrameRate == 0.0) { |
1112 | // We haven't yet parsed a frame rate from the stream. |
1113 | // So parse this NAL unit to check whether frame rate information is present: |
1114 | unsigned num_units_in_tick, time_scale; |
1115 | analyze_seq_parameter_set_data(num_units_in_tick, time_scale); |
1116 | if (time_scale > 0 && num_units_in_tick > 0) { |
1117 | usingSource()->fFrameRate = fParsedFrameRate |
1118 | = time_scale/(DeltaTfiDivisor*num_units_in_tick); |
1119 | #ifdef DEBUG |
1120 | fprintf(stderr, "Set frame rate to %f fps\n" , usingSource()->fFrameRate); |
1121 | #endif |
1122 | } else { |
1123 | #ifdef DEBUG |
1124 | fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n" , usingSource()->fFrameRate); |
1125 | #endif |
1126 | } |
1127 | } |
1128 | } else if (isPPS(nal_unit_type)) { // Picture parameter set |
1129 | // Save a copy of this NAL unit, in case the downstream object wants to see it: |
1130 | usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize); |
1131 | } else if (isSEI(nal_unit_type)) { // Supplemental enhancement information (SEI) |
1132 | analyze_sei_data(nal_unit_type); |
1133 | // Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? ##### |
1134 | } |
1135 | |
1136 | usingSource()->setPresentationTime(); |
1137 | #ifdef DEBUG |
1138 | unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec; |
1139 | unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec; |
1140 | fprintf(stderr, "\tPresentation time: %lu.%06u\n" , secs, uSecs); |
1141 | #endif |
1142 | |
1143 | // Now, check whether this NAL unit ends an 'access unit'. |
1144 | // (RTP streamers need to know this in order to figure out whether or not to set the "M" bit.) |
1145 | Boolean thisNALUnitEndsAccessUnit; |
1146 | if (haveSeenEOF() || isEOF(nal_unit_type)) { |
1147 | // There is no next NAL unit, so we assume that this one ends the current 'access unit': |
1148 | thisNALUnitEndsAccessUnit = True; |
1149 | } else if (usuallyBeginsAccessUnit(nal_unit_type)) { |
1150 | // These NAL units usually *begin* an access unit, so assume that they don't end one here: |
1151 | thisNALUnitEndsAccessUnit = False; |
1152 | } else { |
1153 | // We need to check the *next* NAL unit to figure out whether |
1154 | // the current NAL unit ends an 'access unit': |
1155 | u_int8_t firstBytesOfNextNALUnit[3]; |
1156 | testBytes(firstBytesOfNextNALUnit, 3); |
1157 | |
1158 | u_int8_t const& next_nal_unit_type = fHNumber == 264 |
1159 | ? (firstBytesOfNextNALUnit[0]&0x1F) : ((firstBytesOfNextNALUnit[0]&0x7E)>>1); |
1160 | if (isVCL(next_nal_unit_type)) { |
1161 | // The high-order bit of the byte after the "nal_unit_header" tells us whether it's |
1162 | // the start of a new 'access unit' (and thus the current NAL unit ends an 'access unit'): |
1163 | u_int8_t const |
1164 | = fHNumber == 264 ? firstBytesOfNextNALUnit[1] : firstBytesOfNextNALUnit[2]; |
1165 | thisNALUnitEndsAccessUnit = (byteAfter_nal_unit_header&0x80) != 0; |
1166 | } else if (usuallyBeginsAccessUnit(next_nal_unit_type)) { |
1167 | // The next NAL unit's type is one that usually appears at the start of an 'access unit', |
1168 | // so we assume that the current NAL unit ends an 'access unit': |
1169 | thisNALUnitEndsAccessUnit = True; |
1170 | } else { |
1171 | // The next NAL unit definitely doesn't start a new 'access unit', |
1172 | // which means that the current NAL unit doesn't end one: |
1173 | thisNALUnitEndsAccessUnit = False; |
1174 | } |
1175 | } |
1176 | |
1177 | if (thisNALUnitEndsAccessUnit) { |
1178 | #ifdef DEBUG |
1179 | fprintf(stderr, "*****This NAL unit ends the current access unit*****\n" ); |
1180 | #endif |
1181 | usingSource()->fPictureEndMarker = True; |
1182 | ++usingSource()->fPictureCount; |
1183 | |
1184 | // Note that the presentation time for the next NAL unit will be different: |
1185 | struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias |
1186 | nextPT = usingSource()->fPresentationTime; |
1187 | double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate; |
1188 | unsigned nextSecsIncrement = (long)nextFraction; |
1189 | nextPT.tv_sec += (long)nextSecsIncrement; |
1190 | nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000); |
1191 | } |
1192 | setParseState(); |
1193 | |
1194 | return curFrameSize(); |
1195 | } catch (int /*e*/) { |
1196 | #ifdef DEBUG |
1197 | fprintf(stderr, "H264or5VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n" ); |
1198 | #endif |
1199 | return 0; // the parsing got interrupted |
1200 | } |
1201 | } |
1202 | |
1203 | unsigned removeH264or5EmulationBytes(u_int8_t* to, unsigned toMaxSize, |
1204 | u_int8_t const* from, unsigned fromSize) { |
1205 | unsigned toSize = 0; |
1206 | unsigned i = 0; |
1207 | while (i < fromSize && toSize+1 < toMaxSize) { |
1208 | if (i+2 < fromSize && from[i] == 0 && from[i+1] == 0 && from[i+2] == 3) { |
1209 | to[toSize] = to[toSize+1] = 0; |
1210 | toSize += 2; |
1211 | i += 3; |
1212 | } else { |
1213 | to[toSize] = from[i]; |
1214 | toSize += 1; |
1215 | i += 1; |
1216 | } |
1217 | } |
1218 | |
1219 | return toSize; |
1220 | } |
1221 | |