| 1 | /********** |
| 2 | This library is free software; you can redistribute it and/or modify it under |
| 3 | the terms of the GNU Lesser General Public License as published by the |
| 4 | Free Software Foundation; either version 3 of the License, or (at your |
| 5 | option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.) |
| 6 | |
| 7 | This library is distributed in the hope that it will be useful, but WITHOUT |
| 8 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 9 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for |
| 10 | more details. |
| 11 | |
| 12 | You should have received a copy of the GNU Lesser General Public License |
| 13 | along with this library; if not, write to the Free Software Foundation, Inc., |
| 14 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 15 | **********/ |
| 16 | // "liveMedia" |
| 17 | // Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved. |
| 18 | // A simplified version of "MPEG4VideoStreamFramer" that takes only complete, |
| 19 | // discrete frames (rather than an arbitrary byte stream) as input. |
| 20 | // This avoids the parsing and data copying overhead of the full |
| 21 | // "MPEG4VideoStreamFramer". |
| 22 | // Implementation |
| 23 | |
| 24 | #include "MPEG4VideoStreamDiscreteFramer.hh" |
| 25 | |
| 26 | MPEG4VideoStreamDiscreteFramer* |
| 27 | MPEG4VideoStreamDiscreteFramer::createNew(UsageEnvironment& env, |
| 28 | FramedSource* inputSource, Boolean leavePresentationTimesUnmodified) { |
| 29 | // Need to add source type checking here??? ##### |
| 30 | return new MPEG4VideoStreamDiscreteFramer(env, inputSource, leavePresentationTimesUnmodified); |
| 31 | } |
| 32 | |
| 33 | MPEG4VideoStreamDiscreteFramer |
| 34 | ::MPEG4VideoStreamDiscreteFramer(UsageEnvironment& env, |
| 35 | FramedSource* inputSource, Boolean leavePresentationTimesUnmodified) |
| 36 | : MPEG4VideoStreamFramer(env, inputSource, False/*don't create a parser*/), |
| 37 | fLeavePresentationTimesUnmodified(leavePresentationTimesUnmodified), vop_time_increment_resolution(0), fNumVTIRBits(0), |
| 38 | fLastNonBFrameVop_time_increment(0) { |
| 39 | fLastNonBFramePresentationTime.tv_sec = 0; |
| 40 | fLastNonBFramePresentationTime.tv_usec = 0; |
| 41 | } |
| 42 | |
| 43 | MPEG4VideoStreamDiscreteFramer::~MPEG4VideoStreamDiscreteFramer() { |
| 44 | } |
| 45 | |
| 46 | void MPEG4VideoStreamDiscreteFramer::doGetNextFrame() { |
| 47 | // Arrange to read data (which should be a complete MPEG-4 video frame) |
| 48 | // from our data source, directly into the client's input buffer. |
| 49 | // After reading this, we'll do some parsing on the frame. |
| 50 | fInputSource->getNextFrame(fTo, fMaxSize, |
| 51 | afterGettingFrame, this, |
| 52 | FramedSource::handleClosure, this); |
| 53 | } |
| 54 | |
| 55 | void MPEG4VideoStreamDiscreteFramer |
| 56 | ::afterGettingFrame(void* clientData, unsigned frameSize, |
| 57 | unsigned numTruncatedBytes, |
| 58 | struct timeval presentationTime, |
| 59 | unsigned durationInMicroseconds) { |
| 60 | MPEG4VideoStreamDiscreteFramer* source = (MPEG4VideoStreamDiscreteFramer*)clientData; |
| 61 | source->afterGettingFrame1(frameSize, numTruncatedBytes, |
| 62 | presentationTime, durationInMicroseconds); |
| 63 | } |
| 64 | |
| 65 | void MPEG4VideoStreamDiscreteFramer |
| 66 | ::afterGettingFrame1(unsigned frameSize, unsigned numTruncatedBytes, |
| 67 | struct timeval presentationTime, |
| 68 | unsigned durationInMicroseconds) { |
| 69 | // Check that the first 4 bytes are a system code: |
| 70 | if (frameSize >= 4 && fTo[0] == 0 && fTo[1] == 0 && fTo[2] == 1) { |
| 71 | fPictureEndMarker = True; // Assume that we have a complete 'picture' here |
| 72 | unsigned i = 3; |
| 73 | if (fTo[i] == 0xB0) { // VISUAL_OBJECT_SEQUENCE_START_CODE |
| 74 | // The next byte is the "profile_and_level_indication": |
| 75 | if (frameSize >= 5) fProfileAndLevelIndication = fTo[4]; |
| 76 | |
| 77 | // The start of this frame - up to the first GROUP_VOP_START_CODE |
| 78 | // or VOP_START_CODE - is stream configuration information. Save this: |
| 79 | for (i = 7; i < frameSize; ++i) { |
| 80 | if ((fTo[i] == 0xB3 /*GROUP_VOP_START_CODE*/ || |
| 81 | fTo[i] == 0xB6 /*VOP_START_CODE*/) |
| 82 | && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) { |
| 83 | break; // The configuration information ends here |
| 84 | } |
| 85 | } |
| 86 | fNumConfigBytes = i < frameSize ? i-3 : frameSize; |
| 87 | delete[] fConfigBytes; fConfigBytes = new unsigned char[fNumConfigBytes]; |
| 88 | for (unsigned j = 0; j < fNumConfigBytes; ++j) fConfigBytes[j] = fTo[j]; |
| 89 | |
| 90 | // This information (should) also contain a VOL header, which we need |
| 91 | // to analyze, to get "vop_time_increment_resolution" (which we need |
| 92 | // - along with "vop_time_increment" - in order to generate accurate |
| 93 | // presentation times for "B" frames). |
| 94 | analyzeVOLHeader(); |
| 95 | } |
| 96 | |
| 97 | if (i < frameSize) { |
| 98 | u_int8_t nextCode = fTo[i]; |
| 99 | |
| 100 | if (nextCode == 0xB3 /*GROUP_VOP_START_CODE*/) { |
| 101 | // Skip to the following VOP_START_CODE (if any): |
| 102 | for (i += 4; i < frameSize; ++i) { |
| 103 | if (fTo[i] == 0xB6 /*VOP_START_CODE*/ |
| 104 | && fTo[i-1] == 1 && fTo[i-2] == 0 && fTo[i-3] == 0) { |
| 105 | nextCode = fTo[i]; |
| 106 | break; |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | if (nextCode == 0xB6 /*VOP_START_CODE*/ && i+5 < frameSize) { |
| 112 | ++i; |
| 113 | |
| 114 | // Get the "vop_coding_type" from the next byte: |
| 115 | u_int8_t nextByte = fTo[i++]; |
| 116 | u_int8_t vop_coding_type = nextByte>>6; |
| 117 | |
| 118 | // Next, get the "modulo_time_base" by counting the '1' bits that |
| 119 | // follow. We look at the next 32-bits only. |
| 120 | // This should be enough in most cases. |
| 121 | u_int32_t next4Bytes |
| 122 | = (fTo[i]<<24)|(fTo[i+1]<<16)|(fTo[i+2]<<8)|fTo[i+3]; |
| 123 | i += 4; |
| 124 | u_int32_t timeInfo = (nextByte<<(32-6))|(next4Bytes>>6); |
| 125 | unsigned modulo_time_base = 0; |
| 126 | u_int32_t mask = 0x80000000; |
| 127 | while ((timeInfo&mask) != 0) { |
| 128 | ++modulo_time_base; |
| 129 | mask >>= 1; |
| 130 | } |
| 131 | mask >>= 2; |
| 132 | |
| 133 | // Then, get the "vop_time_increment". |
| 134 | unsigned vop_time_increment = 0; |
| 135 | // First, make sure we have enough bits left for this: |
| 136 | if ((mask>>(fNumVTIRBits-1)) != 0) { |
| 137 | for (unsigned i = 0; i < fNumVTIRBits; ++i) { |
| 138 | vop_time_increment |= timeInfo&mask; |
| 139 | mask >>= 1; |
| 140 | } |
| 141 | while (mask != 0) { |
| 142 | vop_time_increment >>= 1; |
| 143 | mask >>= 1; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | // If this is a "B" frame, then we have to tweak "presentationTime": |
| 148 | if (!fLeavePresentationTimesUnmodified && vop_coding_type == 2/*B*/ |
| 149 | && (fLastNonBFramePresentationTime.tv_usec > 0 || |
| 150 | fLastNonBFramePresentationTime.tv_sec > 0)) { |
| 151 | int timeIncrement |
| 152 | = fLastNonBFrameVop_time_increment - vop_time_increment; |
| 153 | if (timeIncrement<0) timeIncrement += vop_time_increment_resolution; |
| 154 | unsigned const MILLION = 1000000; |
| 155 | double usIncrement = vop_time_increment_resolution == 0 ? 0.0 |
| 156 | : ((double)timeIncrement*MILLION)/vop_time_increment_resolution; |
| 157 | unsigned secondsToSubtract = (unsigned)(usIncrement/MILLION); |
| 158 | unsigned uSecondsToSubtract = ((unsigned)usIncrement)%MILLION; |
| 159 | |
| 160 | presentationTime = fLastNonBFramePresentationTime; |
| 161 | if ((unsigned)presentationTime.tv_usec < uSecondsToSubtract) { |
| 162 | presentationTime.tv_usec += MILLION; |
| 163 | if (presentationTime.tv_sec > 0) --presentationTime.tv_sec; |
| 164 | } |
| 165 | presentationTime.tv_usec -= uSecondsToSubtract; |
| 166 | if ((unsigned)presentationTime.tv_sec > secondsToSubtract) { |
| 167 | presentationTime.tv_sec -= secondsToSubtract; |
| 168 | } else { |
| 169 | presentationTime.tv_sec = presentationTime.tv_usec = 0; |
| 170 | } |
| 171 | } else { |
| 172 | fLastNonBFramePresentationTime = presentationTime; |
| 173 | fLastNonBFrameVop_time_increment = vop_time_increment; |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | // Complete delivery to the client: |
| 180 | fFrameSize = frameSize; |
| 181 | fNumTruncatedBytes = numTruncatedBytes; |
| 182 | fPresentationTime = presentationTime; |
| 183 | fDurationInMicroseconds = durationInMicroseconds; |
| 184 | afterGetting(this); |
| 185 | } |
| 186 | |
| 187 | Boolean MPEG4VideoStreamDiscreteFramer::getNextFrameBit(u_int8_t& result) { |
| 188 | if (fNumBitsSeenSoFar/8 >= fNumConfigBytes) return False; |
| 189 | |
| 190 | u_int8_t nextByte = fConfigBytes[fNumBitsSeenSoFar/8]; |
| 191 | result = (nextByte>>(7-fNumBitsSeenSoFar%8))&1; |
| 192 | ++fNumBitsSeenSoFar; |
| 193 | return True; |
| 194 | } |
| 195 | |
| 196 | Boolean MPEG4VideoStreamDiscreteFramer::getNextFrameBits(unsigned numBits, |
| 197 | u_int32_t& result) { |
| 198 | result = 0; |
| 199 | for (unsigned i = 0; i < numBits; ++i) { |
| 200 | u_int8_t nextBit; |
| 201 | if (!getNextFrameBit(nextBit)) return False; |
| 202 | result = (result<<1)|nextBit; |
| 203 | } |
| 204 | return True; |
| 205 | } |
| 206 | |
| 207 | void MPEG4VideoStreamDiscreteFramer::() { |
| 208 | // Begin by moving to the VOL header: |
| 209 | unsigned i; |
| 210 | for (i = 3; i < fNumConfigBytes; ++i) { |
| 211 | if (fConfigBytes[i] >= 0x20 && fConfigBytes[i] <= 0x2F |
| 212 | && fConfigBytes[i-1] == 1 |
| 213 | && fConfigBytes[i-2] == 0 && fConfigBytes[i-3] == 0) { |
| 214 | ++i; |
| 215 | break; |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | fNumBitsSeenSoFar = 8*i + 9; |
| 220 | do { |
| 221 | u_int8_t is_object_layer_identifier; |
| 222 | if (!getNextFrameBit(is_object_layer_identifier)) break; |
| 223 | if (is_object_layer_identifier) fNumBitsSeenSoFar += 7; |
| 224 | |
| 225 | u_int32_t aspect_ratio_info; |
| 226 | if (!getNextFrameBits(4, aspect_ratio_info)) break; |
| 227 | if (aspect_ratio_info == 15 /*extended_PAR*/) fNumBitsSeenSoFar += 16; |
| 228 | |
| 229 | u_int8_t vol_control_parameters; |
| 230 | if (!getNextFrameBit(vol_control_parameters)) break; |
| 231 | if (vol_control_parameters) { |
| 232 | fNumBitsSeenSoFar += 3; // chroma_format; low_delay |
| 233 | u_int8_t vbw_parameters; |
| 234 | if (!getNextFrameBit(vbw_parameters)) break; |
| 235 | if (vbw_parameters) fNumBitsSeenSoFar += 79; |
| 236 | } |
| 237 | |
| 238 | fNumBitsSeenSoFar += 2; // video_object_layer_shape |
| 239 | u_int8_t marker_bit; |
| 240 | if (!getNextFrameBit(marker_bit)) break; |
| 241 | if (marker_bit != 1) break; // sanity check |
| 242 | |
| 243 | if (!getNextFrameBits(16, vop_time_increment_resolution)) break; |
| 244 | if (vop_time_increment_resolution == 0) break; // shouldn't happen |
| 245 | |
| 246 | // Compute how many bits are necessary to represent this: |
| 247 | fNumVTIRBits = 0; |
| 248 | for (unsigned test = vop_time_increment_resolution; test>0; test /= 2) { |
| 249 | ++fNumVTIRBits; |
| 250 | } |
| 251 | } while (0); |
| 252 | } |
| 253 | |