1 | /********** |
2 | This library is free software; you can redistribute it and/or modify it under |
3 | the terms of the GNU Lesser General Public License as published by the |
4 | Free Software Foundation; either version 3 of the License, or (at your |
5 | option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.) |
6 | |
7 | This library is distributed in the hope that it will be useful, but WITHOUT |
8 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
9 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for |
10 | more details. |
11 | |
12 | You should have received a copy of the GNU Lesser General Public License |
13 | along with this library; if not, write to the Free Software Foundation, Inc., |
14 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
15 | **********/ |
16 | // "liveMedia" |
17 | // Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved. |
18 | // A filter that breaks up an MPEG-4 video elementary stream into |
19 | // frames for: |
20 | // - Visual Object Sequence (VS) Header + Visual Object (VO) Header |
21 | // + Video Object Layer (VOL) Header |
22 | // - Group of VOP (GOV) Header |
23 | // - VOP frame |
24 | // Implementation |
25 | |
26 | #include "MPEG4VideoStreamFramer.hh" |
27 | #include "MPEGVideoStreamParser.hh" |
28 | #include "MPEG4LATMAudioRTPSource.hh" // for "parseGeneralConfigStr()" |
29 | #include <string.h> |
30 | |
31 | ////////// MPEG4VideoStreamParser definition ////////// |
32 | |
33 | // An enum representing the current state of the parser: |
34 | enum MPEGParseState { |
35 | PARSING_VISUAL_OBJECT_SEQUENCE, |
36 | PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE, |
37 | PARSING_VISUAL_OBJECT, |
38 | PARSING_VIDEO_OBJECT_LAYER, |
39 | PARSING_GROUP_OF_VIDEO_OBJECT_PLANE, |
40 | PARSING_VIDEO_OBJECT_PLANE, |
41 | PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE |
42 | }; |
43 | |
44 | class MPEG4VideoStreamParser: public MPEGVideoStreamParser { |
45 | public: |
46 | MPEG4VideoStreamParser(MPEG4VideoStreamFramer* usingSource, |
47 | FramedSource* inputSource); |
48 | virtual ~MPEG4VideoStreamParser(); |
49 | |
50 | private: // redefined virtual functions: |
51 | virtual void flushInput(); |
52 | virtual unsigned parse(); |
53 | |
54 | private: |
55 | MPEG4VideoStreamFramer* usingSource() { |
56 | return (MPEG4VideoStreamFramer*)fUsingSource; |
57 | } |
58 | void setParseState(MPEGParseState parseState); |
59 | |
60 | unsigned parseVisualObjectSequence(Boolean haveSeenStartCode = False); |
61 | unsigned parseVisualObject(); |
62 | unsigned parseVideoObjectLayer(); |
63 | unsigned parseGroupOfVideoObjectPlane(); |
64 | unsigned parseVideoObjectPlane(); |
65 | unsigned parseVisualObjectSequenceEndCode(); |
66 | |
67 | // These are used for parsing within an already-read frame: |
68 | Boolean getNextFrameBit(u_int8_t& result); |
69 | Boolean getNextFrameBits(unsigned numBits, u_int32_t& result); |
70 | |
71 | // Which are used by: |
72 | void analyzeVOLHeader(); |
73 | |
74 | private: |
75 | MPEGParseState fCurrentParseState; |
76 | unsigned fNumBitsSeenSoFar; // used by the getNextFrameBit*() routines |
77 | u_int32_t vop_time_increment_resolution; |
78 | unsigned fNumVTIRBits; |
79 | // # of bits needed to count to "vop_time_increment_resolution" |
80 | u_int8_t fixed_vop_rate; |
81 | unsigned fixed_vop_time_increment; // used if 'fixed_vop_rate' is set |
82 | unsigned fSecondsSinceLastTimeCode, fTotalTicksSinceLastTimeCode, fPrevNewTotalTicks; |
83 | unsigned fPrevPictureCountDelta; |
84 | Boolean fJustSawTimeCode; |
85 | }; |
86 | |
87 | |
88 | ////////// MPEG4VideoStreamFramer implementation ////////// |
89 | |
90 | MPEG4VideoStreamFramer* |
91 | MPEG4VideoStreamFramer::createNew(UsageEnvironment& env, |
92 | FramedSource* inputSource) { |
93 | // Need to add source type checking here??? ##### |
94 | return new MPEG4VideoStreamFramer(env, inputSource); |
95 | } |
96 | |
97 | unsigned char* MPEG4VideoStreamFramer |
98 | ::getConfigBytes(unsigned& numBytes) const { |
99 | numBytes = fNumConfigBytes; |
100 | return fConfigBytes; |
101 | } |
102 | |
103 | void MPEG4VideoStreamFramer |
104 | ::setConfigInfo(u_int8_t profileAndLevelIndication, char const* configStr) { |
105 | fProfileAndLevelIndication = profileAndLevelIndication; |
106 | |
107 | delete[] fConfigBytes; |
108 | fConfigBytes = parseGeneralConfigStr(configStr, fNumConfigBytes); |
109 | } |
110 | |
111 | MPEG4VideoStreamFramer::MPEG4VideoStreamFramer(UsageEnvironment& env, |
112 | FramedSource* inputSource, |
113 | Boolean createParser) |
114 | : MPEGVideoStreamFramer(env, inputSource), |
115 | fProfileAndLevelIndication(0), |
116 | fConfigBytes(NULL), fNumConfigBytes(0), |
117 | fNewConfigBytes(NULL), fNumNewConfigBytes(0) { |
118 | fParser = createParser |
119 | ? new MPEG4VideoStreamParser(this, inputSource) |
120 | : NULL; |
121 | } |
122 | |
123 | MPEG4VideoStreamFramer::~MPEG4VideoStreamFramer() { |
124 | delete[] fConfigBytes; delete[] fNewConfigBytes; |
125 | } |
126 | |
127 | void MPEG4VideoStreamFramer::startNewConfig() { |
128 | delete[] fNewConfigBytes; fNewConfigBytes = NULL; |
129 | fNumNewConfigBytes = 0; |
130 | } |
131 | |
132 | void MPEG4VideoStreamFramer |
133 | ::appendToNewConfig(unsigned char* newConfigBytes, unsigned numNewBytes) { |
134 | // Allocate a new block of memory for the new config bytes: |
135 | unsigned char* configNew |
136 | = new unsigned char[fNumNewConfigBytes + numNewBytes]; |
137 | |
138 | // Copy the old, then the new, config bytes there: |
139 | memmove(configNew, fNewConfigBytes, fNumNewConfigBytes); |
140 | memmove(&configNew[fNumNewConfigBytes], newConfigBytes, numNewBytes); |
141 | |
142 | delete[] fNewConfigBytes; fNewConfigBytes = configNew; |
143 | fNumNewConfigBytes += numNewBytes; |
144 | } |
145 | |
146 | void MPEG4VideoStreamFramer::completeNewConfig() { |
147 | delete[] fConfigBytes; fConfigBytes = fNewConfigBytes; |
148 | fNewConfigBytes = NULL; |
149 | fNumConfigBytes = fNumNewConfigBytes; |
150 | fNumNewConfigBytes = 0; |
151 | } |
152 | |
153 | Boolean MPEG4VideoStreamFramer::isMPEG4VideoStreamFramer() const { |
154 | return True; |
155 | } |
156 | |
157 | ////////// MPEG4VideoStreamParser implementation ////////// |
158 | |
159 | MPEG4VideoStreamParser |
160 | ::MPEG4VideoStreamParser(MPEG4VideoStreamFramer* usingSource, |
161 | FramedSource* inputSource) |
162 | : MPEGVideoStreamParser(usingSource, inputSource), |
163 | fCurrentParseState(PARSING_VISUAL_OBJECT_SEQUENCE), |
164 | vop_time_increment_resolution(0), fNumVTIRBits(0), |
165 | fixed_vop_rate(0), fixed_vop_time_increment(0), |
166 | fSecondsSinceLastTimeCode(0), fTotalTicksSinceLastTimeCode(0), |
167 | fPrevNewTotalTicks(0), fPrevPictureCountDelta(1), fJustSawTimeCode(False) { |
168 | } |
169 | |
170 | MPEG4VideoStreamParser::~MPEG4VideoStreamParser() { |
171 | } |
172 | |
173 | void MPEG4VideoStreamParser::setParseState(MPEGParseState parseState) { |
174 | fCurrentParseState = parseState; |
175 | MPEGVideoStreamParser::setParseState(); |
176 | } |
177 | |
178 | void MPEG4VideoStreamParser::flushInput() { |
179 | fSecondsSinceLastTimeCode = 0; |
180 | fTotalTicksSinceLastTimeCode = 0; |
181 | fPrevNewTotalTicks = 0; |
182 | fPrevPictureCountDelta = 1; |
183 | |
184 | StreamParser::flushInput(); |
185 | if (fCurrentParseState != PARSING_VISUAL_OBJECT_SEQUENCE) { |
186 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); // later, change to GOV or VOP? ##### |
187 | } |
188 | } |
189 | |
190 | |
191 | unsigned MPEG4VideoStreamParser::parse() { |
192 | try { |
193 | switch (fCurrentParseState) { |
194 | case PARSING_VISUAL_OBJECT_SEQUENCE: { |
195 | return parseVisualObjectSequence(); |
196 | } |
197 | case PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE: { |
198 | return parseVisualObjectSequence(True); |
199 | } |
200 | case PARSING_VISUAL_OBJECT: { |
201 | return parseVisualObject(); |
202 | } |
203 | case PARSING_VIDEO_OBJECT_LAYER: { |
204 | return parseVideoObjectLayer(); |
205 | } |
206 | case PARSING_GROUP_OF_VIDEO_OBJECT_PLANE: { |
207 | return parseGroupOfVideoObjectPlane(); |
208 | } |
209 | case PARSING_VIDEO_OBJECT_PLANE: { |
210 | return parseVideoObjectPlane(); |
211 | } |
212 | case PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE: { |
213 | return parseVisualObjectSequenceEndCode(); |
214 | } |
215 | default: { |
216 | return 0; // shouldn't happen |
217 | } |
218 | } |
219 | } catch (int /*e*/) { |
220 | #ifdef DEBUG |
221 | fprintf(stderr, "MPEG4VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n" ); |
222 | #endif |
223 | return 0; // the parsing got interrupted |
224 | } |
225 | } |
226 | |
227 | #define VISUAL_OBJECT_SEQUENCE_START_CODE 0x000001B0 |
228 | #define VISUAL_OBJECT_SEQUENCE_END_CODE 0x000001B1 |
229 | #define GROUP_VOP_START_CODE 0x000001B3 |
230 | #define VISUAL_OBJECT_START_CODE 0x000001B5 |
231 | #define VOP_START_CODE 0x000001B6 |
232 | |
233 | unsigned MPEG4VideoStreamParser |
234 | ::parseVisualObjectSequence(Boolean haveSeenStartCode) { |
235 | #ifdef DEBUG |
236 | fprintf(stderr, "parsing VisualObjectSequence\n" ); |
237 | #endif |
238 | usingSource()->startNewConfig(); |
239 | u_int32_t first4Bytes; |
240 | if (!haveSeenStartCode) { |
241 | while ((first4Bytes = test4Bytes()) != VISUAL_OBJECT_SEQUENCE_START_CODE) { |
242 | #ifdef DEBUG |
243 | fprintf(stderr, "ignoring non VS header: 0x%08x\n" , first4Bytes); |
244 | #endif |
245 | get1Byte(); setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); |
246 | // ensures we progress over bad data |
247 | } |
248 | first4Bytes = get4Bytes(); |
249 | } else { |
250 | // We've already seen the start code |
251 | first4Bytes = VISUAL_OBJECT_SEQUENCE_START_CODE; |
252 | } |
253 | save4Bytes(first4Bytes); |
254 | |
255 | // The next byte is the "profile_and_level_indication": |
256 | u_int8_t pali = get1Byte(); |
257 | #ifdef DEBUG |
258 | fprintf(stderr, "profile_and_level_indication: %02x\n" , pali); |
259 | #endif |
260 | saveByte(pali); |
261 | usingSource()->fProfileAndLevelIndication = pali; |
262 | |
263 | // Now, copy all bytes that we see, up until we reach |
264 | // a VISUAL_OBJECT_START_CODE: |
265 | u_int32_t next4Bytes = get4Bytes(); |
266 | while (next4Bytes != VISUAL_OBJECT_START_CODE) { |
267 | saveToNextCode(next4Bytes); |
268 | } |
269 | |
270 | setParseState(PARSING_VISUAL_OBJECT); |
271 | |
272 | // Compute this frame's presentation time: |
273 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
274 | |
275 | // This header forms part of the 'configuration' information: |
276 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
277 | |
278 | return curFrameSize(); |
279 | } |
280 | |
281 | static inline Boolean isVideoObjectStartCode(u_int32_t code) { |
282 | return code >= 0x00000100 && code <= 0x0000011F; |
283 | } |
284 | |
285 | unsigned MPEG4VideoStreamParser::parseVisualObject() { |
286 | #ifdef DEBUG |
287 | fprintf(stderr, "parsing VisualObject\n" ); |
288 | #endif |
289 | // Note that we've already read the VISUAL_OBJECT_START_CODE |
290 | save4Bytes(VISUAL_OBJECT_START_CODE); |
291 | |
292 | // Next, extract the "visual_object_type" from the next 1 or 2 bytes: |
293 | u_int8_t nextByte = get1Byte(); saveByte(nextByte); |
294 | Boolean is_visual_object_identifier = (nextByte&0x80) != 0; |
295 | u_int8_t visual_object_type; |
296 | if (is_visual_object_identifier) { |
297 | #ifdef DEBUG |
298 | fprintf(stderr, "visual_object_verid: 0x%x; visual_object_priority: 0x%x\n" , (nextByte&0x78)>>3, (nextByte&0x07)); |
299 | #endif |
300 | nextByte = get1Byte(); saveByte(nextByte); |
301 | visual_object_type = (nextByte&0xF0)>>4; |
302 | } else { |
303 | visual_object_type = (nextByte&0x78)>>3; |
304 | } |
305 | #ifdef DEBUG |
306 | fprintf(stderr, "visual_object_type: 0x%x\n" , visual_object_type); |
307 | #endif |
308 | // At present, we support only the "Video ID" "visual_object_type" (1) |
309 | if (visual_object_type != 1) { |
310 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVisualObject(): Warning: We don't handle visual_object_type " << visual_object_type << "\n" ; |
311 | } |
312 | |
313 | // Now, copy all bytes that we see, up until we reach |
314 | // a video_object_start_code |
315 | u_int32_t next4Bytes = get4Bytes(); |
316 | while (!isVideoObjectStartCode(next4Bytes)) { |
317 | saveToNextCode(next4Bytes); |
318 | } |
319 | save4Bytes(next4Bytes); |
320 | #ifdef DEBUG |
321 | fprintf(stderr, "saw a video_object_start_code: 0x%08x\n" , next4Bytes); |
322 | #endif |
323 | |
324 | setParseState(PARSING_VIDEO_OBJECT_LAYER); |
325 | |
326 | // Compute this frame's presentation time: |
327 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
328 | |
329 | // This header forms part of the 'configuration' information: |
330 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
331 | |
332 | return curFrameSize(); |
333 | } |
334 | |
335 | static inline Boolean isVideoObjectLayerStartCode(u_int32_t code) { |
336 | return code >= 0x00000120 && code <= 0x0000012F; |
337 | } |
338 | |
339 | Boolean MPEG4VideoStreamParser::getNextFrameBit(u_int8_t& result) { |
340 | if (fNumBitsSeenSoFar/8 >= curFrameSize()) return False; |
341 | |
342 | u_int8_t nextByte = fStartOfFrame[fNumBitsSeenSoFar/8]; |
343 | result = (nextByte>>(7-fNumBitsSeenSoFar%8))&1; |
344 | ++fNumBitsSeenSoFar; |
345 | return True; |
346 | } |
347 | |
348 | Boolean MPEG4VideoStreamParser::getNextFrameBits(unsigned numBits, |
349 | u_int32_t& result) { |
350 | result = 0; |
351 | for (unsigned i = 0; i < numBits; ++i) { |
352 | u_int8_t nextBit; |
353 | if (!getNextFrameBit(nextBit)) return False; |
354 | result = (result<<1)|nextBit; |
355 | } |
356 | return True; |
357 | } |
358 | |
359 | void MPEG4VideoStreamParser::() { |
360 | // Extract timing information (in particular, |
361 | // "vop_time_increment_resolution") from the VOL Header: |
362 | fNumBitsSeenSoFar = 41; |
363 | do { |
364 | u_int8_t is_object_layer_identifier; |
365 | if (!getNextFrameBit(is_object_layer_identifier)) break; |
366 | if (is_object_layer_identifier) fNumBitsSeenSoFar += 7; |
367 | |
368 | u_int32_t aspect_ratio_info; |
369 | if (!getNextFrameBits(4, aspect_ratio_info)) break; |
370 | if (aspect_ratio_info == 15 /*extended_PAR*/) fNumBitsSeenSoFar += 16; |
371 | |
372 | u_int8_t vol_control_parameters; |
373 | if (!getNextFrameBit(vol_control_parameters)) break; |
374 | if (vol_control_parameters) { |
375 | fNumBitsSeenSoFar += 3; // chroma_format; low_delay |
376 | u_int8_t vbw_parameters; |
377 | if (!getNextFrameBit(vbw_parameters)) break; |
378 | if (vbw_parameters) fNumBitsSeenSoFar += 79; |
379 | } |
380 | |
381 | fNumBitsSeenSoFar += 2; // video_object_layer_shape |
382 | u_int8_t marker_bit; |
383 | if (!getNextFrameBit(marker_bit)) break; |
384 | if (marker_bit != 1) { // sanity check |
385 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): marker_bit 1 not set!\n" ; |
386 | break; |
387 | } |
388 | |
389 | if (!getNextFrameBits(16, vop_time_increment_resolution)) break; |
390 | #ifdef DEBUG |
391 | fprintf(stderr, "vop_time_increment_resolution: %d\n" , vop_time_increment_resolution); |
392 | #endif |
393 | if (vop_time_increment_resolution == 0) { |
394 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): vop_time_increment_resolution is zero!\n" ; |
395 | break; |
396 | } |
397 | // Compute how many bits are necessary to represent this: |
398 | fNumVTIRBits = 0; |
399 | for (unsigned test = vop_time_increment_resolution; test>0; test /= 2) { |
400 | ++fNumVTIRBits; |
401 | } |
402 | |
403 | if (!getNextFrameBit(marker_bit)) break; |
404 | if (marker_bit != 1) { // sanity check |
405 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): marker_bit 2 not set!\n" ; |
406 | break; |
407 | } |
408 | |
409 | if (!getNextFrameBit(fixed_vop_rate)) break; |
410 | if (fixed_vop_rate) { |
411 | // Get the following "fixed_vop_time_increment": |
412 | if (!getNextFrameBits(fNumVTIRBits, fixed_vop_time_increment)) break; |
413 | #ifdef DEBUG |
414 | fprintf(stderr, "fixed_vop_time_increment: %d\n" , fixed_vop_time_increment); |
415 | if (fixed_vop_time_increment == 0) { |
416 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): fixed_vop_time_increment is zero!\n" ; |
417 | } |
418 | #endif |
419 | } |
420 | // Use "vop_time_increment_resolution" as the 'frame rate' |
421 | // (really, 'tick rate'): |
422 | usingSource()->fFrameRate = (double)vop_time_increment_resolution; |
423 | #ifdef DEBUG |
424 | fprintf(stderr, "fixed_vop_rate: %d; 'frame' (really tick) rate: %f\n" , fixed_vop_rate, usingSource()->fFrameRate); |
425 | #endif |
426 | |
427 | return; |
428 | } while (0); |
429 | |
430 | if (fNumBitsSeenSoFar/8 >= curFrameSize()) { |
431 | char errMsg[200]; |
432 | sprintf(errMsg, "Not enough bits in VOL header: %d/8 >= %d\n" , fNumBitsSeenSoFar, curFrameSize()); |
433 | usingSource()->envir() << errMsg; |
434 | } |
435 | } |
436 | |
437 | unsigned MPEG4VideoStreamParser::parseVideoObjectLayer() { |
438 | #ifdef DEBUG |
439 | fprintf(stderr, "parsing VideoObjectLayer\n" ); |
440 | #endif |
441 | // The first 4 bytes must be a "video_object_layer_start_code". |
442 | // If not, this is a 'short video header', which we currently |
443 | // don't support: |
444 | u_int32_t next4Bytes = get4Bytes(); |
445 | if (!isVideoObjectLayerStartCode(next4Bytes)) { |
446 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectLayer(): This appears to be a 'short video header', which we currently don't support\n" ; |
447 | } |
448 | |
449 | // Now, copy all bytes that we see, up until we reach |
450 | // a GROUP_VOP_START_CODE or a VOP_START_CODE: |
451 | do { |
452 | saveToNextCode(next4Bytes); |
453 | } while (next4Bytes != GROUP_VOP_START_CODE |
454 | && next4Bytes != VOP_START_CODE); |
455 | |
456 | analyzeVOLHeader(); |
457 | |
458 | setParseState((next4Bytes == GROUP_VOP_START_CODE) |
459 | ? PARSING_GROUP_OF_VIDEO_OBJECT_PLANE |
460 | : PARSING_VIDEO_OBJECT_PLANE); |
461 | |
462 | // Compute this frame's presentation time: |
463 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
464 | |
465 | // This header ends the 'configuration' information: |
466 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
467 | usingSource()->completeNewConfig(); |
468 | |
469 | return curFrameSize(); |
470 | } |
471 | |
472 | unsigned MPEG4VideoStreamParser::parseGroupOfVideoObjectPlane() { |
473 | #ifdef DEBUG |
474 | fprintf(stderr, "parsing GroupOfVideoObjectPlane\n" ); |
475 | #endif |
476 | // Note that we've already read the GROUP_VOP_START_CODE |
477 | save4Bytes(GROUP_VOP_START_CODE); |
478 | |
479 | // Next, extract the (18-bit) time code from the next 3 bytes: |
480 | u_int8_t next3Bytes[3]; |
481 | getBytes(next3Bytes, 3); |
482 | saveByte(next3Bytes[0]);saveByte(next3Bytes[1]);saveByte(next3Bytes[2]); |
483 | unsigned time_code |
484 | = (next3Bytes[0]<<10)|(next3Bytes[1]<<2)|(next3Bytes[2]>>6); |
485 | unsigned time_code_hours = (time_code&0x0003E000)>>13; |
486 | unsigned time_code_minutes = (time_code&0x00001F80)>>7; |
487 | #if defined(DEBUG) || defined(DEBUG_TIMESTAMPS) |
488 | Boolean marker_bit = (time_code&0x00000040) != 0; |
489 | #endif |
490 | unsigned time_code_seconds = (time_code&0x0000003F); |
491 | #if defined(DEBUG) || defined(DEBUG_TIMESTAMPS) |
492 | fprintf(stderr, "time_code: 0x%05x, hours %d, minutes %d, marker_bit %d, seconds %d\n" , time_code, time_code_hours, time_code_minutes, marker_bit, time_code_seconds); |
493 | #endif |
494 | fJustSawTimeCode = True; |
495 | |
496 | // Now, copy all bytes that we see, up until we reach a VOP_START_CODE: |
497 | u_int32_t next4Bytes = get4Bytes(); |
498 | while (next4Bytes != VOP_START_CODE) { |
499 | saveToNextCode(next4Bytes); |
500 | } |
501 | |
502 | // Compute this frame's presentation time: |
503 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
504 | |
505 | // Record the time code: |
506 | usingSource()->setTimeCode(time_code_hours, time_code_minutes, |
507 | time_code_seconds, 0, 0); |
508 | // Note: Because the GOV header can appear anywhere (not just at a 1s point), we |
509 | // don't pass "fTotalTicksSinceLastTimeCode" as the "picturesSinceLastGOP" parameter. |
510 | fSecondsSinceLastTimeCode = 0; |
511 | if (fixed_vop_rate) fTotalTicksSinceLastTimeCode = 0; |
512 | |
513 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
514 | |
515 | return curFrameSize(); |
516 | } |
517 | |
518 | unsigned MPEG4VideoStreamParser::parseVideoObjectPlane() { |
519 | #ifdef DEBUG |
520 | fprintf(stderr, "#parsing VideoObjectPlane\n" ); |
521 | #endif |
522 | // Note that we've already read the VOP_START_CODE |
523 | save4Bytes(VOP_START_CODE); |
524 | |
525 | // Get the "vop_coding_type" from the next byte: |
526 | u_int8_t nextByte = get1Byte(); saveByte(nextByte); |
527 | u_int8_t vop_coding_type = nextByte>>6; |
528 | |
529 | // Next, get the "modulo_time_base" by counting the '1' bits that follow. |
530 | // We look at the next 32-bits only. This should be enough in most cases. |
531 | u_int32_t next4Bytes = get4Bytes(); |
532 | u_int32_t timeInfo = (nextByte<<(32-6))|(next4Bytes>>6); |
533 | unsigned modulo_time_base = 0; |
534 | u_int32_t mask = 0x80000000; |
535 | while ((timeInfo&mask) != 0) { |
536 | ++modulo_time_base; |
537 | mask >>= 1; |
538 | } |
539 | mask >>= 1; |
540 | |
541 | // Check the following marker bit: |
542 | if ((timeInfo&mask) == 0) { |
543 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): marker bit not set!\n" ; |
544 | } |
545 | mask >>= 1; |
546 | |
547 | // Then, get the "vop_time_increment". |
548 | // First, make sure we have enough bits left for this: |
549 | if ((mask>>(fNumVTIRBits-1)) == 0) { |
550 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): 32-bits are not enough to get \"vop_time_increment\"!\n" ; |
551 | } |
552 | unsigned vop_time_increment = 0; |
553 | for (unsigned i = 0; i < fNumVTIRBits; ++i) { |
554 | vop_time_increment |= timeInfo&mask; |
555 | mask >>= 1; |
556 | } |
557 | while (mask != 0) { |
558 | vop_time_increment >>= 1; |
559 | mask >>= 1; |
560 | } |
561 | #ifdef DEBUG |
562 | fprintf(stderr, "vop_coding_type: %d(%c), modulo_time_base: %d, vop_time_increment: %d\n" , vop_coding_type, "IPBS" [vop_coding_type], modulo_time_base, vop_time_increment); |
563 | #endif |
564 | |
565 | // Now, copy all bytes that we see, up until we reach a code of some sort: |
566 | saveToNextCode(next4Bytes); |
567 | |
568 | // Update our counters based on the frame timing information that we saw: |
569 | if (fixed_vop_time_increment > 0) { |
570 | // This is a 'fixed_vop_rate' stream. Use 'fixed_vop_time_increment': |
571 | usingSource()->fPictureCount += fixed_vop_time_increment; |
572 | if (vop_time_increment > 0 || modulo_time_base > 0) { |
573 | fTotalTicksSinceLastTimeCode += fixed_vop_time_increment; |
574 | // Note: "fSecondsSinceLastTimeCode" and "fPrevNewTotalTicks" are not used. |
575 | } |
576 | } else { |
577 | // Use 'vop_time_increment': |
578 | unsigned newTotalTicks |
579 | = (fSecondsSinceLastTimeCode + modulo_time_base)*vop_time_increment_resolution |
580 | + vop_time_increment; |
581 | if (newTotalTicks == fPrevNewTotalTicks && fPrevNewTotalTicks > 0) { |
582 | // This is apparently a buggy MPEG-4 video stream, because |
583 | // "vop_time_increment" did not change. Overcome this error, |
584 | // by pretending that it did change. |
585 | #ifdef DEBUG |
586 | fprintf(stderr, "Buggy MPEG-4 video stream: \"vop_time_increment\" did not change!\n" ); |
587 | #endif |
588 | // The following assumes that we don't have 'B' frames. If we do, then TARFU! |
589 | usingSource()->fPictureCount += vop_time_increment; |
590 | fTotalTicksSinceLastTimeCode += vop_time_increment; |
591 | fSecondsSinceLastTimeCode += modulo_time_base; |
592 | } else { |
593 | if (newTotalTicks < fPrevNewTotalTicks && vop_coding_type != 2/*B*/ |
594 | && modulo_time_base == 0 && vop_time_increment == 0 && !fJustSawTimeCode) { |
595 | // This is another kind of buggy MPEG-4 video stream, in which |
596 | // "vop_time_increment" wraps around, but without |
597 | // "modulo_time_base" changing (or just having had a new time code). |
598 | // Overcome this by pretending that "vop_time_increment" *did* wrap around: |
599 | #ifdef DEBUG |
600 | fprintf(stderr, "Buggy MPEG-4 video stream: \"vop_time_increment\" wrapped around, but without \"modulo_time_base\" changing!\n" ); |
601 | #endif |
602 | ++fSecondsSinceLastTimeCode; |
603 | newTotalTicks += vop_time_increment_resolution; |
604 | } |
605 | fPrevNewTotalTicks = newTotalTicks; |
606 | if (vop_coding_type != 2/*B*/) { |
607 | int pictureCountDelta = newTotalTicks - fTotalTicksSinceLastTimeCode; |
608 | if (pictureCountDelta <= 0) pictureCountDelta = fPrevPictureCountDelta; |
609 | // ensures that the picture count is always increasing |
610 | usingSource()->fPictureCount += pictureCountDelta; |
611 | fPrevPictureCountDelta = pictureCountDelta; |
612 | fTotalTicksSinceLastTimeCode = newTotalTicks; |
613 | fSecondsSinceLastTimeCode += modulo_time_base; |
614 | } |
615 | } |
616 | } |
617 | fJustSawTimeCode = False; // for next time |
618 | |
619 | // The next thing to parse depends on the code that we just saw, |
620 | // but we are assumed to have ended the current picture: |
621 | usingSource()->fPictureEndMarker = True; // HACK ##### |
622 | switch (next4Bytes) { |
623 | case VISUAL_OBJECT_SEQUENCE_END_CODE: { |
624 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE); |
625 | break; |
626 | } |
627 | case VISUAL_OBJECT_SEQUENCE_START_CODE: { |
628 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE); |
629 | break; |
630 | } |
631 | case VISUAL_OBJECT_START_CODE: { |
632 | setParseState(PARSING_VISUAL_OBJECT); |
633 | break; |
634 | } |
635 | case GROUP_VOP_START_CODE: { |
636 | setParseState(PARSING_GROUP_OF_VIDEO_OBJECT_PLANE); |
637 | break; |
638 | } |
639 | case VOP_START_CODE: { |
640 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
641 | break; |
642 | } |
643 | default: { |
644 | if (isVideoObjectStartCode(next4Bytes)) { |
645 | setParseState(PARSING_VIDEO_OBJECT_LAYER); |
646 | } else if (isVideoObjectLayerStartCode(next4Bytes)){ |
647 | // copy all bytes that we see, up until we reach a VOP_START_CODE: |
648 | u_int32_t next4Bytes = get4Bytes(); |
649 | while (next4Bytes != VOP_START_CODE) { |
650 | saveToNextCode(next4Bytes); |
651 | } |
652 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
653 | } else { |
654 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): Saw unexpected code " |
655 | << (void*)next4Bytes << "\n" ; |
656 | setParseState(PARSING_VIDEO_OBJECT_PLANE); // the safest way to recover... |
657 | } |
658 | break; |
659 | } |
660 | } |
661 | |
662 | // Compute this frame's presentation time: |
663 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
664 | |
665 | return curFrameSize(); |
666 | } |
667 | |
668 | unsigned MPEG4VideoStreamParser::parseVisualObjectSequenceEndCode() { |
669 | #ifdef DEBUG |
670 | fprintf(stderr, "parsing VISUAL_OBJECT_SEQUENCE_END_CODE\n" ); |
671 | #endif |
672 | // Note that we've already read the VISUAL_OBJECT_SEQUENCE_END_CODE |
673 | save4Bytes(VISUAL_OBJECT_SEQUENCE_END_CODE); |
674 | |
675 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); |
676 | |
677 | // Treat this as if we had ended a picture: |
678 | usingSource()->fPictureEndMarker = True; // HACK ##### |
679 | |
680 | return curFrameSize(); |
681 | } |
682 | |