| 1 | /********** |
| 2 | This library is free software; you can redistribute it and/or modify it under |
| 3 | the terms of the GNU Lesser General Public License as published by the |
| 4 | Free Software Foundation; either version 3 of the License, or (at your |
| 5 | option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.) |
| 6 | |
| 7 | This library is distributed in the hope that it will be useful, but WITHOUT |
| 8 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 9 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for |
| 10 | more details. |
| 11 | |
| 12 | You should have received a copy of the GNU Lesser General Public License |
| 13 | along with this library; if not, write to the Free Software Foundation, Inc., |
| 14 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 15 | **********/ |
| 16 | // "liveMedia" |
| 17 | // Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved. |
| 18 | // A filter that breaks up an MPEG-4 video elementary stream into |
| 19 | // frames for: |
| 20 | // - Visual Object Sequence (VS) Header + Visual Object (VO) Header |
| 21 | // + Video Object Layer (VOL) Header |
| 22 | // - Group of VOP (GOV) Header |
| 23 | // - VOP frame |
| 24 | // Implementation |
| 25 | |
| 26 | #include "MPEG4VideoStreamFramer.hh" |
| 27 | #include "MPEGVideoStreamParser.hh" |
| 28 | #include "MPEG4LATMAudioRTPSource.hh" // for "parseGeneralConfigStr()" |
| 29 | #include <string.h> |
| 30 | |
| 31 | ////////// MPEG4VideoStreamParser definition ////////// |
| 32 | |
| 33 | // An enum representing the current state of the parser: |
| 34 | enum MPEGParseState { |
| 35 | PARSING_VISUAL_OBJECT_SEQUENCE, |
| 36 | PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE, |
| 37 | PARSING_VISUAL_OBJECT, |
| 38 | PARSING_VIDEO_OBJECT_LAYER, |
| 39 | PARSING_GROUP_OF_VIDEO_OBJECT_PLANE, |
| 40 | PARSING_VIDEO_OBJECT_PLANE, |
| 41 | PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE |
| 42 | }; |
| 43 | |
| 44 | class MPEG4VideoStreamParser: public MPEGVideoStreamParser { |
| 45 | public: |
| 46 | MPEG4VideoStreamParser(MPEG4VideoStreamFramer* usingSource, |
| 47 | FramedSource* inputSource); |
| 48 | virtual ~MPEG4VideoStreamParser(); |
| 49 | |
| 50 | private: // redefined virtual functions: |
| 51 | virtual void flushInput(); |
| 52 | virtual unsigned parse(); |
| 53 | |
| 54 | private: |
| 55 | MPEG4VideoStreamFramer* usingSource() { |
| 56 | return (MPEG4VideoStreamFramer*)fUsingSource; |
| 57 | } |
| 58 | void setParseState(MPEGParseState parseState); |
| 59 | |
| 60 | unsigned parseVisualObjectSequence(Boolean haveSeenStartCode = False); |
| 61 | unsigned parseVisualObject(); |
| 62 | unsigned parseVideoObjectLayer(); |
| 63 | unsigned parseGroupOfVideoObjectPlane(); |
| 64 | unsigned parseVideoObjectPlane(); |
| 65 | unsigned parseVisualObjectSequenceEndCode(); |
| 66 | |
| 67 | // These are used for parsing within an already-read frame: |
| 68 | Boolean getNextFrameBit(u_int8_t& result); |
| 69 | Boolean getNextFrameBits(unsigned numBits, u_int32_t& result); |
| 70 | |
| 71 | // Which are used by: |
| 72 | void analyzeVOLHeader(); |
| 73 | |
| 74 | private: |
| 75 | MPEGParseState fCurrentParseState; |
| 76 | unsigned fNumBitsSeenSoFar; // used by the getNextFrameBit*() routines |
| 77 | u_int32_t vop_time_increment_resolution; |
| 78 | unsigned fNumVTIRBits; |
| 79 | // # of bits needed to count to "vop_time_increment_resolution" |
| 80 | u_int8_t fixed_vop_rate; |
| 81 | unsigned fixed_vop_time_increment; // used if 'fixed_vop_rate' is set |
| 82 | unsigned fSecondsSinceLastTimeCode, fTotalTicksSinceLastTimeCode, fPrevNewTotalTicks; |
| 83 | unsigned fPrevPictureCountDelta; |
| 84 | Boolean fJustSawTimeCode; |
| 85 | }; |
| 86 | |
| 87 | |
| 88 | ////////// MPEG4VideoStreamFramer implementation ////////// |
| 89 | |
| 90 | MPEG4VideoStreamFramer* |
| 91 | MPEG4VideoStreamFramer::createNew(UsageEnvironment& env, |
| 92 | FramedSource* inputSource) { |
| 93 | // Need to add source type checking here??? ##### |
| 94 | return new MPEG4VideoStreamFramer(env, inputSource); |
| 95 | } |
| 96 | |
| 97 | unsigned char* MPEG4VideoStreamFramer |
| 98 | ::getConfigBytes(unsigned& numBytes) const { |
| 99 | numBytes = fNumConfigBytes; |
| 100 | return fConfigBytes; |
| 101 | } |
| 102 | |
| 103 | void MPEG4VideoStreamFramer |
| 104 | ::setConfigInfo(u_int8_t profileAndLevelIndication, char const* configStr) { |
| 105 | fProfileAndLevelIndication = profileAndLevelIndication; |
| 106 | |
| 107 | delete[] fConfigBytes; |
| 108 | fConfigBytes = parseGeneralConfigStr(configStr, fNumConfigBytes); |
| 109 | } |
| 110 | |
| 111 | MPEG4VideoStreamFramer::MPEG4VideoStreamFramer(UsageEnvironment& env, |
| 112 | FramedSource* inputSource, |
| 113 | Boolean createParser) |
| 114 | : MPEGVideoStreamFramer(env, inputSource), |
| 115 | fProfileAndLevelIndication(0), |
| 116 | fConfigBytes(NULL), fNumConfigBytes(0), |
| 117 | fNewConfigBytes(NULL), fNumNewConfigBytes(0) { |
| 118 | fParser = createParser |
| 119 | ? new MPEG4VideoStreamParser(this, inputSource) |
| 120 | : NULL; |
| 121 | } |
| 122 | |
| 123 | MPEG4VideoStreamFramer::~MPEG4VideoStreamFramer() { |
| 124 | delete[] fConfigBytes; delete[] fNewConfigBytes; |
| 125 | } |
| 126 | |
| 127 | void MPEG4VideoStreamFramer::startNewConfig() { |
| 128 | delete[] fNewConfigBytes; fNewConfigBytes = NULL; |
| 129 | fNumNewConfigBytes = 0; |
| 130 | } |
| 131 | |
| 132 | void MPEG4VideoStreamFramer |
| 133 | ::appendToNewConfig(unsigned char* newConfigBytes, unsigned numNewBytes) { |
| 134 | // Allocate a new block of memory for the new config bytes: |
| 135 | unsigned char* configNew |
| 136 | = new unsigned char[fNumNewConfigBytes + numNewBytes]; |
| 137 | |
| 138 | // Copy the old, then the new, config bytes there: |
| 139 | memmove(configNew, fNewConfigBytes, fNumNewConfigBytes); |
| 140 | memmove(&configNew[fNumNewConfigBytes], newConfigBytes, numNewBytes); |
| 141 | |
| 142 | delete[] fNewConfigBytes; fNewConfigBytes = configNew; |
| 143 | fNumNewConfigBytes += numNewBytes; |
| 144 | } |
| 145 | |
| 146 | void MPEG4VideoStreamFramer::completeNewConfig() { |
| 147 | delete[] fConfigBytes; fConfigBytes = fNewConfigBytes; |
| 148 | fNewConfigBytes = NULL; |
| 149 | fNumConfigBytes = fNumNewConfigBytes; |
| 150 | fNumNewConfigBytes = 0; |
| 151 | } |
| 152 | |
| 153 | Boolean MPEG4VideoStreamFramer::isMPEG4VideoStreamFramer() const { |
| 154 | return True; |
| 155 | } |
| 156 | |
| 157 | ////////// MPEG4VideoStreamParser implementation ////////// |
| 158 | |
| 159 | MPEG4VideoStreamParser |
| 160 | ::MPEG4VideoStreamParser(MPEG4VideoStreamFramer* usingSource, |
| 161 | FramedSource* inputSource) |
| 162 | : MPEGVideoStreamParser(usingSource, inputSource), |
| 163 | fCurrentParseState(PARSING_VISUAL_OBJECT_SEQUENCE), |
| 164 | vop_time_increment_resolution(0), fNumVTIRBits(0), |
| 165 | fixed_vop_rate(0), fixed_vop_time_increment(0), |
| 166 | fSecondsSinceLastTimeCode(0), fTotalTicksSinceLastTimeCode(0), |
| 167 | fPrevNewTotalTicks(0), fPrevPictureCountDelta(1), fJustSawTimeCode(False) { |
| 168 | } |
| 169 | |
| 170 | MPEG4VideoStreamParser::~MPEG4VideoStreamParser() { |
| 171 | } |
| 172 | |
| 173 | void MPEG4VideoStreamParser::setParseState(MPEGParseState parseState) { |
| 174 | fCurrentParseState = parseState; |
| 175 | MPEGVideoStreamParser::setParseState(); |
| 176 | } |
| 177 | |
| 178 | void MPEG4VideoStreamParser::flushInput() { |
| 179 | fSecondsSinceLastTimeCode = 0; |
| 180 | fTotalTicksSinceLastTimeCode = 0; |
| 181 | fPrevNewTotalTicks = 0; |
| 182 | fPrevPictureCountDelta = 1; |
| 183 | |
| 184 | StreamParser::flushInput(); |
| 185 | if (fCurrentParseState != PARSING_VISUAL_OBJECT_SEQUENCE) { |
| 186 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); // later, change to GOV or VOP? ##### |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | |
| 191 | unsigned MPEG4VideoStreamParser::parse() { |
| 192 | try { |
| 193 | switch (fCurrentParseState) { |
| 194 | case PARSING_VISUAL_OBJECT_SEQUENCE: { |
| 195 | return parseVisualObjectSequence(); |
| 196 | } |
| 197 | case PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE: { |
| 198 | return parseVisualObjectSequence(True); |
| 199 | } |
| 200 | case PARSING_VISUAL_OBJECT: { |
| 201 | return parseVisualObject(); |
| 202 | } |
| 203 | case PARSING_VIDEO_OBJECT_LAYER: { |
| 204 | return parseVideoObjectLayer(); |
| 205 | } |
| 206 | case PARSING_GROUP_OF_VIDEO_OBJECT_PLANE: { |
| 207 | return parseGroupOfVideoObjectPlane(); |
| 208 | } |
| 209 | case PARSING_VIDEO_OBJECT_PLANE: { |
| 210 | return parseVideoObjectPlane(); |
| 211 | } |
| 212 | case PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE: { |
| 213 | return parseVisualObjectSequenceEndCode(); |
| 214 | } |
| 215 | default: { |
| 216 | return 0; // shouldn't happen |
| 217 | } |
| 218 | } |
| 219 | } catch (int /*e*/) { |
| 220 | #ifdef DEBUG |
| 221 | fprintf(stderr, "MPEG4VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n" ); |
| 222 | #endif |
| 223 | return 0; // the parsing got interrupted |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | #define VISUAL_OBJECT_SEQUENCE_START_CODE 0x000001B0 |
| 228 | #define VISUAL_OBJECT_SEQUENCE_END_CODE 0x000001B1 |
| 229 | #define GROUP_VOP_START_CODE 0x000001B3 |
| 230 | #define VISUAL_OBJECT_START_CODE 0x000001B5 |
| 231 | #define VOP_START_CODE 0x000001B6 |
| 232 | |
| 233 | unsigned MPEG4VideoStreamParser |
| 234 | ::parseVisualObjectSequence(Boolean haveSeenStartCode) { |
| 235 | #ifdef DEBUG |
| 236 | fprintf(stderr, "parsing VisualObjectSequence\n" ); |
| 237 | #endif |
| 238 | usingSource()->startNewConfig(); |
| 239 | u_int32_t first4Bytes; |
| 240 | if (!haveSeenStartCode) { |
| 241 | while ((first4Bytes = test4Bytes()) != VISUAL_OBJECT_SEQUENCE_START_CODE) { |
| 242 | #ifdef DEBUG |
| 243 | fprintf(stderr, "ignoring non VS header: 0x%08x\n" , first4Bytes); |
| 244 | #endif |
| 245 | get1Byte(); setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); |
| 246 | // ensures we progress over bad data |
| 247 | } |
| 248 | first4Bytes = get4Bytes(); |
| 249 | } else { |
| 250 | // We've already seen the start code |
| 251 | first4Bytes = VISUAL_OBJECT_SEQUENCE_START_CODE; |
| 252 | } |
| 253 | save4Bytes(first4Bytes); |
| 254 | |
| 255 | // The next byte is the "profile_and_level_indication": |
| 256 | u_int8_t pali = get1Byte(); |
| 257 | #ifdef DEBUG |
| 258 | fprintf(stderr, "profile_and_level_indication: %02x\n" , pali); |
| 259 | #endif |
| 260 | saveByte(pali); |
| 261 | usingSource()->fProfileAndLevelIndication = pali; |
| 262 | |
| 263 | // Now, copy all bytes that we see, up until we reach |
| 264 | // a VISUAL_OBJECT_START_CODE: |
| 265 | u_int32_t next4Bytes = get4Bytes(); |
| 266 | while (next4Bytes != VISUAL_OBJECT_START_CODE) { |
| 267 | saveToNextCode(next4Bytes); |
| 268 | } |
| 269 | |
| 270 | setParseState(PARSING_VISUAL_OBJECT); |
| 271 | |
| 272 | // Compute this frame's presentation time: |
| 273 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
| 274 | |
| 275 | // This header forms part of the 'configuration' information: |
| 276 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
| 277 | |
| 278 | return curFrameSize(); |
| 279 | } |
| 280 | |
| 281 | static inline Boolean isVideoObjectStartCode(u_int32_t code) { |
| 282 | return code >= 0x00000100 && code <= 0x0000011F; |
| 283 | } |
| 284 | |
| 285 | unsigned MPEG4VideoStreamParser::parseVisualObject() { |
| 286 | #ifdef DEBUG |
| 287 | fprintf(stderr, "parsing VisualObject\n" ); |
| 288 | #endif |
| 289 | // Note that we've already read the VISUAL_OBJECT_START_CODE |
| 290 | save4Bytes(VISUAL_OBJECT_START_CODE); |
| 291 | |
| 292 | // Next, extract the "visual_object_type" from the next 1 or 2 bytes: |
| 293 | u_int8_t nextByte = get1Byte(); saveByte(nextByte); |
| 294 | Boolean is_visual_object_identifier = (nextByte&0x80) != 0; |
| 295 | u_int8_t visual_object_type; |
| 296 | if (is_visual_object_identifier) { |
| 297 | #ifdef DEBUG |
| 298 | fprintf(stderr, "visual_object_verid: 0x%x; visual_object_priority: 0x%x\n" , (nextByte&0x78)>>3, (nextByte&0x07)); |
| 299 | #endif |
| 300 | nextByte = get1Byte(); saveByte(nextByte); |
| 301 | visual_object_type = (nextByte&0xF0)>>4; |
| 302 | } else { |
| 303 | visual_object_type = (nextByte&0x78)>>3; |
| 304 | } |
| 305 | #ifdef DEBUG |
| 306 | fprintf(stderr, "visual_object_type: 0x%x\n" , visual_object_type); |
| 307 | #endif |
| 308 | // At present, we support only the "Video ID" "visual_object_type" (1) |
| 309 | if (visual_object_type != 1) { |
| 310 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVisualObject(): Warning: We don't handle visual_object_type " << visual_object_type << "\n" ; |
| 311 | } |
| 312 | |
| 313 | // Now, copy all bytes that we see, up until we reach |
| 314 | // a video_object_start_code |
| 315 | u_int32_t next4Bytes = get4Bytes(); |
| 316 | while (!isVideoObjectStartCode(next4Bytes)) { |
| 317 | saveToNextCode(next4Bytes); |
| 318 | } |
| 319 | save4Bytes(next4Bytes); |
| 320 | #ifdef DEBUG |
| 321 | fprintf(stderr, "saw a video_object_start_code: 0x%08x\n" , next4Bytes); |
| 322 | #endif |
| 323 | |
| 324 | setParseState(PARSING_VIDEO_OBJECT_LAYER); |
| 325 | |
| 326 | // Compute this frame's presentation time: |
| 327 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
| 328 | |
| 329 | // This header forms part of the 'configuration' information: |
| 330 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
| 331 | |
| 332 | return curFrameSize(); |
| 333 | } |
| 334 | |
| 335 | static inline Boolean isVideoObjectLayerStartCode(u_int32_t code) { |
| 336 | return code >= 0x00000120 && code <= 0x0000012F; |
| 337 | } |
| 338 | |
| 339 | Boolean MPEG4VideoStreamParser::getNextFrameBit(u_int8_t& result) { |
| 340 | if (fNumBitsSeenSoFar/8 >= curFrameSize()) return False; |
| 341 | |
| 342 | u_int8_t nextByte = fStartOfFrame[fNumBitsSeenSoFar/8]; |
| 343 | result = (nextByte>>(7-fNumBitsSeenSoFar%8))&1; |
| 344 | ++fNumBitsSeenSoFar; |
| 345 | return True; |
| 346 | } |
| 347 | |
| 348 | Boolean MPEG4VideoStreamParser::getNextFrameBits(unsigned numBits, |
| 349 | u_int32_t& result) { |
| 350 | result = 0; |
| 351 | for (unsigned i = 0; i < numBits; ++i) { |
| 352 | u_int8_t nextBit; |
| 353 | if (!getNextFrameBit(nextBit)) return False; |
| 354 | result = (result<<1)|nextBit; |
| 355 | } |
| 356 | return True; |
| 357 | } |
| 358 | |
| 359 | void MPEG4VideoStreamParser::() { |
| 360 | // Extract timing information (in particular, |
| 361 | // "vop_time_increment_resolution") from the VOL Header: |
| 362 | fNumBitsSeenSoFar = 41; |
| 363 | do { |
| 364 | u_int8_t is_object_layer_identifier; |
| 365 | if (!getNextFrameBit(is_object_layer_identifier)) break; |
| 366 | if (is_object_layer_identifier) fNumBitsSeenSoFar += 7; |
| 367 | |
| 368 | u_int32_t aspect_ratio_info; |
| 369 | if (!getNextFrameBits(4, aspect_ratio_info)) break; |
| 370 | if (aspect_ratio_info == 15 /*extended_PAR*/) fNumBitsSeenSoFar += 16; |
| 371 | |
| 372 | u_int8_t vol_control_parameters; |
| 373 | if (!getNextFrameBit(vol_control_parameters)) break; |
| 374 | if (vol_control_parameters) { |
| 375 | fNumBitsSeenSoFar += 3; // chroma_format; low_delay |
| 376 | u_int8_t vbw_parameters; |
| 377 | if (!getNextFrameBit(vbw_parameters)) break; |
| 378 | if (vbw_parameters) fNumBitsSeenSoFar += 79; |
| 379 | } |
| 380 | |
| 381 | fNumBitsSeenSoFar += 2; // video_object_layer_shape |
| 382 | u_int8_t marker_bit; |
| 383 | if (!getNextFrameBit(marker_bit)) break; |
| 384 | if (marker_bit != 1) { // sanity check |
| 385 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): marker_bit 1 not set!\n" ; |
| 386 | break; |
| 387 | } |
| 388 | |
| 389 | if (!getNextFrameBits(16, vop_time_increment_resolution)) break; |
| 390 | #ifdef DEBUG |
| 391 | fprintf(stderr, "vop_time_increment_resolution: %d\n" , vop_time_increment_resolution); |
| 392 | #endif |
| 393 | if (vop_time_increment_resolution == 0) { |
| 394 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): vop_time_increment_resolution is zero!\n" ; |
| 395 | break; |
| 396 | } |
| 397 | // Compute how many bits are necessary to represent this: |
| 398 | fNumVTIRBits = 0; |
| 399 | for (unsigned test = vop_time_increment_resolution; test>0; test /= 2) { |
| 400 | ++fNumVTIRBits; |
| 401 | } |
| 402 | |
| 403 | if (!getNextFrameBit(marker_bit)) break; |
| 404 | if (marker_bit != 1) { // sanity check |
| 405 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): marker_bit 2 not set!\n" ; |
| 406 | break; |
| 407 | } |
| 408 | |
| 409 | if (!getNextFrameBit(fixed_vop_rate)) break; |
| 410 | if (fixed_vop_rate) { |
| 411 | // Get the following "fixed_vop_time_increment": |
| 412 | if (!getNextFrameBits(fNumVTIRBits, fixed_vop_time_increment)) break; |
| 413 | #ifdef DEBUG |
| 414 | fprintf(stderr, "fixed_vop_time_increment: %d\n" , fixed_vop_time_increment); |
| 415 | if (fixed_vop_time_increment == 0) { |
| 416 | usingSource()->envir() << "MPEG4VideoStreamParser::analyzeVOLHeader(): fixed_vop_time_increment is zero!\n" ; |
| 417 | } |
| 418 | #endif |
| 419 | } |
| 420 | // Use "vop_time_increment_resolution" as the 'frame rate' |
| 421 | // (really, 'tick rate'): |
| 422 | usingSource()->fFrameRate = (double)vop_time_increment_resolution; |
| 423 | #ifdef DEBUG |
| 424 | fprintf(stderr, "fixed_vop_rate: %d; 'frame' (really tick) rate: %f\n" , fixed_vop_rate, usingSource()->fFrameRate); |
| 425 | #endif |
| 426 | |
| 427 | return; |
| 428 | } while (0); |
| 429 | |
| 430 | if (fNumBitsSeenSoFar/8 >= curFrameSize()) { |
| 431 | char errMsg[200]; |
| 432 | sprintf(errMsg, "Not enough bits in VOL header: %d/8 >= %d\n" , fNumBitsSeenSoFar, curFrameSize()); |
| 433 | usingSource()->envir() << errMsg; |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | unsigned MPEG4VideoStreamParser::parseVideoObjectLayer() { |
| 438 | #ifdef DEBUG |
| 439 | fprintf(stderr, "parsing VideoObjectLayer\n" ); |
| 440 | #endif |
| 441 | // The first 4 bytes must be a "video_object_layer_start_code". |
| 442 | // If not, this is a 'short video header', which we currently |
| 443 | // don't support: |
| 444 | u_int32_t next4Bytes = get4Bytes(); |
| 445 | if (!isVideoObjectLayerStartCode(next4Bytes)) { |
| 446 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectLayer(): This appears to be a 'short video header', which we currently don't support\n" ; |
| 447 | } |
| 448 | |
| 449 | // Now, copy all bytes that we see, up until we reach |
| 450 | // a GROUP_VOP_START_CODE or a VOP_START_CODE: |
| 451 | do { |
| 452 | saveToNextCode(next4Bytes); |
| 453 | } while (next4Bytes != GROUP_VOP_START_CODE |
| 454 | && next4Bytes != VOP_START_CODE); |
| 455 | |
| 456 | analyzeVOLHeader(); |
| 457 | |
| 458 | setParseState((next4Bytes == GROUP_VOP_START_CODE) |
| 459 | ? PARSING_GROUP_OF_VIDEO_OBJECT_PLANE |
| 460 | : PARSING_VIDEO_OBJECT_PLANE); |
| 461 | |
| 462 | // Compute this frame's presentation time: |
| 463 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
| 464 | |
| 465 | // This header ends the 'configuration' information: |
| 466 | usingSource()->appendToNewConfig(fStartOfFrame, curFrameSize()); |
| 467 | usingSource()->completeNewConfig(); |
| 468 | |
| 469 | return curFrameSize(); |
| 470 | } |
| 471 | |
| 472 | unsigned MPEG4VideoStreamParser::parseGroupOfVideoObjectPlane() { |
| 473 | #ifdef DEBUG |
| 474 | fprintf(stderr, "parsing GroupOfVideoObjectPlane\n" ); |
| 475 | #endif |
| 476 | // Note that we've already read the GROUP_VOP_START_CODE |
| 477 | save4Bytes(GROUP_VOP_START_CODE); |
| 478 | |
| 479 | // Next, extract the (18-bit) time code from the next 3 bytes: |
| 480 | u_int8_t next3Bytes[3]; |
| 481 | getBytes(next3Bytes, 3); |
| 482 | saveByte(next3Bytes[0]);saveByte(next3Bytes[1]);saveByte(next3Bytes[2]); |
| 483 | unsigned time_code |
| 484 | = (next3Bytes[0]<<10)|(next3Bytes[1]<<2)|(next3Bytes[2]>>6); |
| 485 | unsigned time_code_hours = (time_code&0x0003E000)>>13; |
| 486 | unsigned time_code_minutes = (time_code&0x00001F80)>>7; |
| 487 | #if defined(DEBUG) || defined(DEBUG_TIMESTAMPS) |
| 488 | Boolean marker_bit = (time_code&0x00000040) != 0; |
| 489 | #endif |
| 490 | unsigned time_code_seconds = (time_code&0x0000003F); |
| 491 | #if defined(DEBUG) || defined(DEBUG_TIMESTAMPS) |
| 492 | fprintf(stderr, "time_code: 0x%05x, hours %d, minutes %d, marker_bit %d, seconds %d\n" , time_code, time_code_hours, time_code_minutes, marker_bit, time_code_seconds); |
| 493 | #endif |
| 494 | fJustSawTimeCode = True; |
| 495 | |
| 496 | // Now, copy all bytes that we see, up until we reach a VOP_START_CODE: |
| 497 | u_int32_t next4Bytes = get4Bytes(); |
| 498 | while (next4Bytes != VOP_START_CODE) { |
| 499 | saveToNextCode(next4Bytes); |
| 500 | } |
| 501 | |
| 502 | // Compute this frame's presentation time: |
| 503 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
| 504 | |
| 505 | // Record the time code: |
| 506 | usingSource()->setTimeCode(time_code_hours, time_code_minutes, |
| 507 | time_code_seconds, 0, 0); |
| 508 | // Note: Because the GOV header can appear anywhere (not just at a 1s point), we |
| 509 | // don't pass "fTotalTicksSinceLastTimeCode" as the "picturesSinceLastGOP" parameter. |
| 510 | fSecondsSinceLastTimeCode = 0; |
| 511 | if (fixed_vop_rate) fTotalTicksSinceLastTimeCode = 0; |
| 512 | |
| 513 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
| 514 | |
| 515 | return curFrameSize(); |
| 516 | } |
| 517 | |
| 518 | unsigned MPEG4VideoStreamParser::parseVideoObjectPlane() { |
| 519 | #ifdef DEBUG |
| 520 | fprintf(stderr, "#parsing VideoObjectPlane\n" ); |
| 521 | #endif |
| 522 | // Note that we've already read the VOP_START_CODE |
| 523 | save4Bytes(VOP_START_CODE); |
| 524 | |
| 525 | // Get the "vop_coding_type" from the next byte: |
| 526 | u_int8_t nextByte = get1Byte(); saveByte(nextByte); |
| 527 | u_int8_t vop_coding_type = nextByte>>6; |
| 528 | |
| 529 | // Next, get the "modulo_time_base" by counting the '1' bits that follow. |
| 530 | // We look at the next 32-bits only. This should be enough in most cases. |
| 531 | u_int32_t next4Bytes = get4Bytes(); |
| 532 | u_int32_t timeInfo = (nextByte<<(32-6))|(next4Bytes>>6); |
| 533 | unsigned modulo_time_base = 0; |
| 534 | u_int32_t mask = 0x80000000; |
| 535 | while ((timeInfo&mask) != 0) { |
| 536 | ++modulo_time_base; |
| 537 | mask >>= 1; |
| 538 | } |
| 539 | mask >>= 1; |
| 540 | |
| 541 | // Check the following marker bit: |
| 542 | if ((timeInfo&mask) == 0) { |
| 543 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): marker bit not set!\n" ; |
| 544 | } |
| 545 | mask >>= 1; |
| 546 | |
| 547 | // Then, get the "vop_time_increment". |
| 548 | // First, make sure we have enough bits left for this: |
| 549 | if ((mask>>(fNumVTIRBits-1)) == 0) { |
| 550 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): 32-bits are not enough to get \"vop_time_increment\"!\n" ; |
| 551 | } |
| 552 | unsigned vop_time_increment = 0; |
| 553 | for (unsigned i = 0; i < fNumVTIRBits; ++i) { |
| 554 | vop_time_increment |= timeInfo&mask; |
| 555 | mask >>= 1; |
| 556 | } |
| 557 | while (mask != 0) { |
| 558 | vop_time_increment >>= 1; |
| 559 | mask >>= 1; |
| 560 | } |
| 561 | #ifdef DEBUG |
| 562 | fprintf(stderr, "vop_coding_type: %d(%c), modulo_time_base: %d, vop_time_increment: %d\n" , vop_coding_type, "IPBS" [vop_coding_type], modulo_time_base, vop_time_increment); |
| 563 | #endif |
| 564 | |
| 565 | // Now, copy all bytes that we see, up until we reach a code of some sort: |
| 566 | saveToNextCode(next4Bytes); |
| 567 | |
| 568 | // Update our counters based on the frame timing information that we saw: |
| 569 | if (fixed_vop_time_increment > 0) { |
| 570 | // This is a 'fixed_vop_rate' stream. Use 'fixed_vop_time_increment': |
| 571 | usingSource()->fPictureCount += fixed_vop_time_increment; |
| 572 | if (vop_time_increment > 0 || modulo_time_base > 0) { |
| 573 | fTotalTicksSinceLastTimeCode += fixed_vop_time_increment; |
| 574 | // Note: "fSecondsSinceLastTimeCode" and "fPrevNewTotalTicks" are not used. |
| 575 | } |
| 576 | } else { |
| 577 | // Use 'vop_time_increment': |
| 578 | unsigned newTotalTicks |
| 579 | = (fSecondsSinceLastTimeCode + modulo_time_base)*vop_time_increment_resolution |
| 580 | + vop_time_increment; |
| 581 | if (newTotalTicks == fPrevNewTotalTicks && fPrevNewTotalTicks > 0) { |
| 582 | // This is apparently a buggy MPEG-4 video stream, because |
| 583 | // "vop_time_increment" did not change. Overcome this error, |
| 584 | // by pretending that it did change. |
| 585 | #ifdef DEBUG |
| 586 | fprintf(stderr, "Buggy MPEG-4 video stream: \"vop_time_increment\" did not change!\n" ); |
| 587 | #endif |
| 588 | // The following assumes that we don't have 'B' frames. If we do, then TARFU! |
| 589 | usingSource()->fPictureCount += vop_time_increment; |
| 590 | fTotalTicksSinceLastTimeCode += vop_time_increment; |
| 591 | fSecondsSinceLastTimeCode += modulo_time_base; |
| 592 | } else { |
| 593 | if (newTotalTicks < fPrevNewTotalTicks && vop_coding_type != 2/*B*/ |
| 594 | && modulo_time_base == 0 && vop_time_increment == 0 && !fJustSawTimeCode) { |
| 595 | // This is another kind of buggy MPEG-4 video stream, in which |
| 596 | // "vop_time_increment" wraps around, but without |
| 597 | // "modulo_time_base" changing (or just having had a new time code). |
| 598 | // Overcome this by pretending that "vop_time_increment" *did* wrap around: |
| 599 | #ifdef DEBUG |
| 600 | fprintf(stderr, "Buggy MPEG-4 video stream: \"vop_time_increment\" wrapped around, but without \"modulo_time_base\" changing!\n" ); |
| 601 | #endif |
| 602 | ++fSecondsSinceLastTimeCode; |
| 603 | newTotalTicks += vop_time_increment_resolution; |
| 604 | } |
| 605 | fPrevNewTotalTicks = newTotalTicks; |
| 606 | if (vop_coding_type != 2/*B*/) { |
| 607 | int pictureCountDelta = newTotalTicks - fTotalTicksSinceLastTimeCode; |
| 608 | if (pictureCountDelta <= 0) pictureCountDelta = fPrevPictureCountDelta; |
| 609 | // ensures that the picture count is always increasing |
| 610 | usingSource()->fPictureCount += pictureCountDelta; |
| 611 | fPrevPictureCountDelta = pictureCountDelta; |
| 612 | fTotalTicksSinceLastTimeCode = newTotalTicks; |
| 613 | fSecondsSinceLastTimeCode += modulo_time_base; |
| 614 | } |
| 615 | } |
| 616 | } |
| 617 | fJustSawTimeCode = False; // for next time |
| 618 | |
| 619 | // The next thing to parse depends on the code that we just saw, |
| 620 | // but we are assumed to have ended the current picture: |
| 621 | usingSource()->fPictureEndMarker = True; // HACK ##### |
| 622 | switch (next4Bytes) { |
| 623 | case VISUAL_OBJECT_SEQUENCE_END_CODE: { |
| 624 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE_END_CODE); |
| 625 | break; |
| 626 | } |
| 627 | case VISUAL_OBJECT_SEQUENCE_START_CODE: { |
| 628 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE_SEEN_CODE); |
| 629 | break; |
| 630 | } |
| 631 | case VISUAL_OBJECT_START_CODE: { |
| 632 | setParseState(PARSING_VISUAL_OBJECT); |
| 633 | break; |
| 634 | } |
| 635 | case GROUP_VOP_START_CODE: { |
| 636 | setParseState(PARSING_GROUP_OF_VIDEO_OBJECT_PLANE); |
| 637 | break; |
| 638 | } |
| 639 | case VOP_START_CODE: { |
| 640 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
| 641 | break; |
| 642 | } |
| 643 | default: { |
| 644 | if (isVideoObjectStartCode(next4Bytes)) { |
| 645 | setParseState(PARSING_VIDEO_OBJECT_LAYER); |
| 646 | } else if (isVideoObjectLayerStartCode(next4Bytes)){ |
| 647 | // copy all bytes that we see, up until we reach a VOP_START_CODE: |
| 648 | u_int32_t next4Bytes = get4Bytes(); |
| 649 | while (next4Bytes != VOP_START_CODE) { |
| 650 | saveToNextCode(next4Bytes); |
| 651 | } |
| 652 | setParseState(PARSING_VIDEO_OBJECT_PLANE); |
| 653 | } else { |
| 654 | usingSource()->envir() << "MPEG4VideoStreamParser::parseVideoObjectPlane(): Saw unexpected code " |
| 655 | << (void*)next4Bytes << "\n" ; |
| 656 | setParseState(PARSING_VIDEO_OBJECT_PLANE); // the safest way to recover... |
| 657 | } |
| 658 | break; |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | // Compute this frame's presentation time: |
| 663 | usingSource()->computePresentationTime(fTotalTicksSinceLastTimeCode); |
| 664 | |
| 665 | return curFrameSize(); |
| 666 | } |
| 667 | |
| 668 | unsigned MPEG4VideoStreamParser::parseVisualObjectSequenceEndCode() { |
| 669 | #ifdef DEBUG |
| 670 | fprintf(stderr, "parsing VISUAL_OBJECT_SEQUENCE_END_CODE\n" ); |
| 671 | #endif |
| 672 | // Note that we've already read the VISUAL_OBJECT_SEQUENCE_END_CODE |
| 673 | save4Bytes(VISUAL_OBJECT_SEQUENCE_END_CODE); |
| 674 | |
| 675 | setParseState(PARSING_VISUAL_OBJECT_SEQUENCE); |
| 676 | |
| 677 | // Treat this as if we had ended a picture: |
| 678 | usingSource()->fPictureEndMarker = True; // HACK ##### |
| 679 | |
| 680 | return curFrameSize(); |
| 681 | } |
| 682 | |