1/**********
2This library is free software; you can redistribute it and/or modify it under
3the terms of the GNU Lesser General Public License as published by the
4Free Software Foundation; either version 3 of the License, or (at your
5option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.)
6
7This library is distributed in the hope that it will be useful, but WITHOUT
8ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
10more details.
11
12You should have received a copy of the GNU Lesser General Public License
13along with this library; if not, write to the Free Software Foundation, Inc.,
1451 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15**********/
16// "liveMedia"
17// Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved.
18// A parser for a Matroska file.
19// Implementation
20
21#include "MatroskaFileParser.hh"
22#include "MatroskaDemuxedTrack.hh"
23#include <ByteStreamFileSource.hh>
24#include <GroupsockHelper.hh> // for "gettimeofday()
25
26MatroskaFileParser::MatroskaFileParser(MatroskaFile& ourFile, FramedSource* inputSource,
27 FramedSource::onCloseFunc* onEndFunc, void* onEndClientData,
28 MatroskaDemux* ourDemux)
29 : StreamParser(inputSource, onEndFunc, onEndClientData, continueParsing, this),
30 fOurFile(ourFile), fInputSource(inputSource),
31 fOnEndFunc(onEndFunc), fOnEndClientData(onEndClientData),
32 fOurDemux(ourDemux),
33 fCurOffsetInFile(0), fSavedCurOffsetInFile(0), fLimitOffsetInFile(0),
34 fNumHeaderBytesToSkip(0), fClusterTimecode(0), fBlockTimecode(0),
35 fFrameSizesWithinBlock(NULL),
36 fPresentationTimeOffset(0.0) {
37 if (ourDemux == NULL) {
38 // Initialization
39 fCurrentParseState = PARSING_START_OF_FILE;
40
41 continueParsing();
42 } else {
43 fCurrentParseState = LOOKING_FOR_CLUSTER;
44 // In this case, parsing (of track data) doesn't start until a client starts reading from a track.
45 }
46}
47
48MatroskaFileParser::~MatroskaFileParser() {
49 delete[] fFrameSizesWithinBlock;
50 Medium::close(fInputSource);
51}
52
53void MatroskaFileParser::seekToTime(double& seekNPT) {
54#ifdef DEBUG
55 fprintf(stderr, "seekToTime(%f)\n", seekNPT);
56#endif
57 if (seekNPT <= 0.0) {
58#ifdef DEBUG
59 fprintf(stderr, "\t=> start of file\n");
60#endif
61 seekNPT = 0.0;
62 seekToFilePosition(0);
63 } else if (seekNPT >= fOurFile.fileDuration()) {
64#ifdef DEBUG
65 fprintf(stderr, "\t=> end of file\n");
66#endif
67 seekNPT = fOurFile.fileDuration();
68 seekToEndOfFile();
69 } else {
70 u_int64_t clusterOffsetInFile;
71 unsigned blockNumWithinCluster;
72 if (!fOurFile.lookupCuePoint(seekNPT, clusterOffsetInFile, blockNumWithinCluster)) {
73#ifdef DEBUG
74 fprintf(stderr, "\t=> not supported\n");
75#endif
76 return; // seeking not supported
77 }
78
79#ifdef DEBUG
80 fprintf(stderr, "\t=> seek time %f, file position %llu, block number within cluster %d\n", seekNPT, clusterOffsetInFile, blockNumWithinCluster);
81#endif
82 seekToFilePosition(clusterOffsetInFile);
83 fCurrentParseState = LOOKING_FOR_BLOCK;
84 // LATER handle "blockNumWithinCluster"; for now, we assume that it's 0 #####
85 }
86}
87
88void MatroskaFileParser
89::continueParsing(void* clientData, unsigned char* /*ptr*/, unsigned /*size*/, struct timeval /*presentationTime*/) {
90 ((MatroskaFileParser*)clientData)->continueParsing();
91}
92
93void MatroskaFileParser::continueParsing() {
94 if (fInputSource != NULL) {
95 if (!parse()) {
96 // We didn't complete the parsing, because we had to read more data from the source, or because we're waiting for
97 // another read from downstream. Once that happens, we'll get called again.
98 return;
99 }
100 }
101
102 // We successfully parsed the file. Call our 'done' function now:
103 if (fOnEndFunc != NULL) (*fOnEndFunc)(fOnEndClientData);
104}
105
106Boolean MatroskaFileParser::parse() {
107 Boolean areDone = False;
108
109 if (fInputSource->isCurrentlyAwaitingData()) return False;
110 // Our input source is currently being read. Wait until that read completes
111 try {
112 skipRemainingHeaderBytes(True); // if any
113 do {
114 if (fInputSource->isCurrentlyAwaitingData()) return False;
115 // Our input source is currently being read. Wait until that read completes
116
117 switch (fCurrentParseState) {
118 case PARSING_START_OF_FILE: {
119 areDone = parseStartOfFile();
120 break;
121 }
122 case LOOKING_FOR_TRACKS: {
123 lookForNextTrack();
124 break;
125 }
126 case PARSING_TRACK: {
127 areDone = parseTrack();
128 if (areDone && fOurFile.fCuesOffset > 0) {
129 // We've finished parsing the 'Track' information. There are also 'Cues' in the file, so parse those before finishing:
130 // Seek to the specified position in the file. We were already told that the 'Cues' begins there:
131#ifdef DEBUG
132 fprintf(stderr, "Seeking to file position %llu (the previously-reported location of 'Cues')\n", fOurFile.fCuesOffset);
133#endif
134 seekToFilePosition(fOurFile.fCuesOffset);
135 fCurrentParseState = PARSING_CUES;
136 areDone = False;
137 }
138 break;
139 }
140 case PARSING_CUES: {
141 areDone = parseCues();
142 break;
143 }
144 case LOOKING_FOR_CLUSTER: {
145 if (fOurFile.fClusterOffset > 0) {
146 // Optimization: Seek to the specified position in the file. We were already told that the 'Cluster' begins there:
147#ifdef DEBUG
148 fprintf(stderr, "Optimization: Seeking to file position %llu (the previously-reported location of a 'Cluster')\n", fOurFile.fClusterOffset);
149#endif
150 seekToFilePosition(fOurFile.fClusterOffset);
151 }
152 fCurrentParseState = LOOKING_FOR_BLOCK;
153 break;
154 }
155 case LOOKING_FOR_BLOCK: {
156 lookForNextBlock();
157 break;
158 }
159 case PARSING_BLOCK: {
160 parseBlock();
161 break;
162 }
163 case DELIVERING_FRAME_WITHIN_BLOCK: {
164 if (!deliverFrameWithinBlock()) return False;
165 break;
166 }
167 case DELIVERING_FRAME_BYTES: {
168 deliverFrameBytes();
169 return False; // Halt parsing for now. A new 'read' from downstream will cause parsing to resume.
170 break;
171 }
172 }
173 } while (!areDone);
174
175 return True;
176 } catch (int /*e*/) {
177#ifdef DEBUG
178 fprintf(stderr, "MatroskaFileParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
179#endif
180 return False; // the parsing got interrupted
181 }
182}
183
184Boolean MatroskaFileParser::parseStartOfFile() {
185#ifdef DEBUG
186 fprintf(stderr, "parsing start of file\n");
187#endif
188 EBMLId id;
189 EBMLDataSize size;
190
191 // The file must begin with the standard EBML header (which we skip):
192 if (!parseEBMLIdAndSize(id, size) || id != MATROSKA_ID_EBML) {
193 fOurFile.envir() << "ERROR: File does not begin with an EBML header\n";
194 return True; // We're done with the file, because it's not valid
195 }
196#ifdef DEBUG
197 fprintf(stderr, "MatroskaFileParser::parseStartOfFile(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val());
198#endif
199
200 fCurrentParseState = LOOKING_FOR_TRACKS;
201 skipHeader(size);
202
203 return False; // because we have more parsing to do - inside the 'Track' header
204}
205
206void MatroskaFileParser::lookForNextTrack() {
207#ifdef DEBUG
208 fprintf(stderr, "looking for Track\n");
209#endif
210 EBMLId id;
211 EBMLDataSize size;
212
213 // Read and skip over (or enter) each Matroska header, until we get to a 'Track'.
214 while (fCurrentParseState == LOOKING_FOR_TRACKS) {
215 while (!parseEBMLIdAndSize(id, size)) {}
216#ifdef DEBUG
217 fprintf(stderr, "MatroskaFileParser::lookForNextTrack(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val());
218#endif
219 switch (id.val()) {
220 case MATROSKA_ID_SEGMENT: { // 'Segment' header: enter this
221 // Remember the position, within the file, of the start of Segment data, because Seek Positions are relative to this:
222 fOurFile.fSegmentDataOffset = fCurOffsetInFile;
223 break;
224 }
225 case MATROSKA_ID_SEEK_HEAD: { // 'Seek Head' header: enter this
226 break;
227 }
228 case MATROSKA_ID_SEEK: { // 'Seek' header: enter this
229 break;
230 }
231 case MATROSKA_ID_SEEK_ID: { // 'Seek ID' header: get this value
232 if (parseEBMLNumber(fLastSeekId)) {
233#ifdef DEBUG
234 fprintf(stderr, "\tSeek ID 0x%s:\t%s\n", fLastSeekId.hexString(), fLastSeekId.stringName());
235#endif
236 }
237 break;
238 }
239 case MATROSKA_ID_SEEK_POSITION: { // 'Seek Position' header: get this value
240 u_int64_t seekPosition;
241 if (parseEBMLVal_unsigned64(size, seekPosition)) {
242 u_int64_t offsetInFile = fOurFile.fSegmentDataOffset + seekPosition;
243#ifdef DEBUG
244 fprintf(stderr, "\tSeek Position %llu (=> offset within the file: %llu (0x%llx))\n", seekPosition, offsetInFile, offsetInFile);
245#endif
246 // The only 'Seek Position's that we care about are for 'Cluster' and 'Cues':
247 if (fLastSeekId == MATROSKA_ID_CLUSTER) {
248 fOurFile.fClusterOffset = offsetInFile;
249 } else if (fLastSeekId == MATROSKA_ID_CUES) {
250 fOurFile.fCuesOffset = offsetInFile;
251 }
252 }
253 break;
254 }
255 case MATROSKA_ID_INFO: { // 'Segment Info' header: enter this
256 break;
257 }
258 case MATROSKA_ID_TIMECODE_SCALE: { // 'Timecode Scale' header: get this value
259 unsigned timecodeScale;
260 if (parseEBMLVal_unsigned(size, timecodeScale) && timecodeScale > 0) {
261 fOurFile.fTimecodeScale = timecodeScale;
262#ifdef DEBUG
263 fprintf(stderr, "\tTimecode Scale %u ns (=> Segment Duration == %f seconds)\n",
264 fOurFile.timecodeScale(), fOurFile.segmentDuration()*(fOurFile.fTimecodeScale/1000000000.0f));
265#endif
266 }
267 break;
268 }
269 case MATROSKA_ID_DURATION: { // 'Segment Duration' header: get this value
270 if (parseEBMLVal_float(size, fOurFile.fSegmentDuration)) {
271#ifdef DEBUG
272 fprintf(stderr, "\tSegment Duration %f (== %f seconds)\n",
273 fOurFile.segmentDuration(), fOurFile.segmentDuration()*(fOurFile.fTimecodeScale/1000000000.0f));
274#endif
275 }
276 break;
277 }
278#ifdef DEBUG
279 case MATROSKA_ID_TITLE: { // 'Segment Title': display this value
280 char* title;
281 if (parseEBMLVal_string(size, title)) {
282#ifdef DEBUG
283 fprintf(stderr, "\tTitle: %s\n", title);
284#endif
285 delete[] title;
286 }
287 break;
288 }
289#endif
290 case MATROSKA_ID_TRACKS: { // enter this, and move on to parsing 'Tracks'
291 fLimitOffsetInFile = fCurOffsetInFile + size.val(); // Make sure we don't read past the end of this header
292 fCurrentParseState = PARSING_TRACK;
293 break;
294 }
295 default: { // skip over this header
296 skipHeader(size);
297 break;
298 }
299 }
300 setParseState();
301 }
302}
303
304Boolean MatroskaFileParser::parseTrack() {
305#ifdef DEBUG
306 fprintf(stderr, "parsing Track\n");
307#endif
308 // Read and process each Matroska header, until we get to the end of the Track:
309 MatroskaTrack* track = NULL;
310 EBMLId id;
311 EBMLDataSize size;
312 while (fCurOffsetInFile < fLimitOffsetInFile) {
313 while (!parseEBMLIdAndSize(id, size)) {}
314#ifdef DEBUG
315 if (id == MATROSKA_ID_TRACK_ENTRY) fprintf(stderr, "\n"); // makes debugging output easier to read
316 fprintf(stderr, "MatroskaFileParser::parseTrack(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val());
317#endif
318 switch (id.val()) {
319 case MATROSKA_ID_TRACK_ENTRY: { // 'Track Entry' header: enter this
320 // Create a new "MatroskaTrack" object for this entry:
321 if (track != NULL && track->trackNumber == 0) delete track; // We had a previous "MatroskaTrack" object that was never used
322 track = new MatroskaTrack;
323 break;
324 }
325 case MATROSKA_ID_TRACK_NUMBER: {
326 unsigned trackNumber;
327 if (parseEBMLVal_unsigned(size, trackNumber)) {
328#ifdef DEBUG
329 fprintf(stderr, "\tTrack Number %d\n", trackNumber);
330#endif
331 if (track != NULL && trackNumber != 0) {
332 track->trackNumber = trackNumber;
333 fOurFile.addTrack(track, trackNumber);
334 }
335 }
336 break;
337 }
338 case MATROSKA_ID_TRACK_TYPE: {
339 unsigned trackType;
340 if (parseEBMLVal_unsigned(size, trackType) && track != NULL) {
341 // We convert the Matroska 'track type' code into our own code (which we can use as a bitmap):
342 track->trackType
343 = trackType == 1 ? MATROSKA_TRACK_TYPE_VIDEO : trackType == 2 ? MATROSKA_TRACK_TYPE_AUDIO
344 : trackType == 0x11 ? MATROSKA_TRACK_TYPE_SUBTITLE : MATROSKA_TRACK_TYPE_OTHER;
345#ifdef DEBUG
346 fprintf(stderr, "\tTrack Type 0x%02x (%s)\n", trackType,
347 track->trackType == MATROSKA_TRACK_TYPE_VIDEO ? "video" :
348 track->trackType == MATROSKA_TRACK_TYPE_AUDIO ? "audio" :
349 track->trackType == MATROSKA_TRACK_TYPE_SUBTITLE ? "subtitle" :
350 "<other>");
351#endif
352 }
353 break;
354 }
355 case MATROSKA_ID_FLAG_ENABLED: {
356 unsigned flagEnabled;
357 if (parseEBMLVal_unsigned(size, flagEnabled)) {
358#ifdef DEBUG
359 fprintf(stderr, "\tTrack is Enabled: %d\n", flagEnabled);
360#endif
361 if (track != NULL) track->isEnabled = flagEnabled != 0;
362 }
363 break;
364 }
365 case MATROSKA_ID_FLAG_DEFAULT: {
366 unsigned flagDefault;
367 if (parseEBMLVal_unsigned(size, flagDefault)) {
368#ifdef DEBUG
369 fprintf(stderr, "\tTrack is Default: %d\n", flagDefault);
370#endif
371 if (track != NULL) track->isDefault = flagDefault != 0;
372 }
373 break;
374 }
375 case MATROSKA_ID_FLAG_FORCED: {
376 unsigned flagForced;
377 if (parseEBMLVal_unsigned(size, flagForced)) {
378#ifdef DEBUG
379 fprintf(stderr, "\tTrack is Forced: %d\n", flagForced);
380#endif
381 if (track != NULL) track->isForced = flagForced != 0;
382 }
383 break;
384 }
385 case MATROSKA_ID_DEFAULT_DURATION: {
386 unsigned defaultDuration;
387 if (parseEBMLVal_unsigned(size, defaultDuration)) {
388#ifdef DEBUG
389 fprintf(stderr, "\tDefault duration %f ms\n", defaultDuration/1000000.0);
390#endif
391 if (track != NULL) track->defaultDuration = defaultDuration;
392 }
393 break;
394 }
395 case MATROSKA_ID_MAX_BLOCK_ADDITION_ID: {
396 unsigned maxBlockAdditionID;
397 if (parseEBMLVal_unsigned(size, maxBlockAdditionID)) {
398#ifdef DEBUG
399 fprintf(stderr, "\tMax Block Addition ID: %u\n", maxBlockAdditionID);
400#endif
401 }
402 break;
403 }
404 case MATROSKA_ID_NAME: {
405 char* name;
406 if (parseEBMLVal_string(size, name)) {
407#ifdef DEBUG
408 fprintf(stderr, "\tName: %s\n", name);
409#endif
410 if (track != NULL) {
411 delete[] track->name; track->name = name;
412 } else {
413 delete[] name;
414 }
415 }
416 break;
417 }
418 case MATROSKA_ID_LANGUAGE: {
419 char* language;
420 if (parseEBMLVal_string(size, language)) {
421#ifdef DEBUG
422 fprintf(stderr, "\tLanguage: %s\n", language);
423#endif
424 if (track != NULL) {
425 delete[] track->language; track->language = language;
426 } else {
427 delete[] language;
428 }
429 }
430 break;
431 }
432 case MATROSKA_ID_CODEC: {
433 char* codecID;
434 if (parseEBMLVal_string(size, codecID)) {
435#ifdef DEBUG
436 fprintf(stderr, "\tCodec ID: %s\n", codecID);
437#endif
438 if (track != NULL) {
439 delete[] track->codecID; track->codecID = codecID;
440
441 // Also set the track's "mimeType" field, if we can deduce it from the "codecID":
442 if (strcmp(codecID, "A_PCM/INT/BIG") == 0) {
443 track->mimeType = "audio/L16";
444 } else if (strncmp(codecID, "A_MPEG", 6) == 0) {
445 track->mimeType = "audio/MPEG";
446 } else if (strncmp(codecID, "A_AAC", 5) == 0) {
447 track->mimeType = "audio/AAC";
448 } else if (strncmp(codecID, "A_AC3", 5) == 0) {
449 track->mimeType = "audio/AC3";
450 } else if (strncmp(codecID, "A_VORBIS", 8) == 0) {
451 track->mimeType = "audio/VORBIS";
452 } else if (strcmp(codecID, "A_OPUS") == 0) {
453 track->mimeType = "audio/OPUS";
454 track->codecIsOpus = True;
455 } else if (strcmp(codecID, "V_MPEG4/ISO/AVC") == 0) {
456 track->mimeType = "video/H264";
457 } else if (strcmp(codecID, "V_MPEGH/ISO/HEVC") == 0) {
458 track->mimeType = "video/H265";
459 } else if (strncmp(codecID, "V_VP8", 5) == 0) {
460 track->mimeType = "video/VP8";
461 } else if (strncmp(codecID, "V_VP9", 5) == 0) {
462 track->mimeType = "video/VP9";
463 } else if (strncmp(codecID, "V_THEORA", 8) == 0) {
464 track->mimeType = "video/THEORA";
465 } else if (strncmp(codecID, "S_TEXT", 6) == 0) {
466 track->mimeType = "text/T140";
467 } else if (strncmp(codecID, "V_MJPEG", 7) == 0) {
468 track->mimeType = "video/JPEG";
469 } else if (strncmp(codecID, "V_UNCOMPRESSED", 14) == 0) {
470 track->mimeType = "video/RAW";
471 }
472 } else {
473 delete[] codecID;
474 }
475 }
476 break;
477 }
478 case MATROSKA_ID_CODEC_PRIVATE: {
479 u_int8_t* codecPrivate;
480 unsigned codecPrivateSize;
481 if (parseEBMLVal_binary(size, codecPrivate)) {
482 codecPrivateSize = (unsigned)size.val();
483#ifdef DEBUG
484 fprintf(stderr, "\tCodec Private: ");
485 for (unsigned i = 0; i < codecPrivateSize; ++i) fprintf(stderr, "%02x:", codecPrivate[i]);
486 fprintf(stderr, "\n");
487#endif
488 if (track != NULL) {
489 delete[] track->codecPrivate; track->codecPrivate = codecPrivate;
490 track->codecPrivateSize = codecPrivateSize;
491
492 // Hack for H.264 and H.265: The 'codec private' data contains
493 // the size of NAL unit lengths:
494 if (track->codecID != NULL) {
495 if (strcmp(track->codecID, "V_MPEG4/ISO/AVC") == 0) { // H.264
496 // Byte 4 of the 'codec private' data contains 'lengthSizeMinusOne':
497 if (codecPrivateSize >= 5) track->subframeSizeSize = (codecPrivate[4]&0x3) + 1;
498 } else if (strcmp(track->codecID, "V_MPEGH/ISO/HEVC") == 0) { // H.265
499 // H.265 'codec private' data is *supposed* to use the format that's described in
500 // http://lists.matroska.org/pipermail/matroska-devel/2013-September/004567.html
501 // However, some Matroska files use the same format that was used for H.264.
502 // We check for this here, by checking various fields that are supposed to be
503 // 'all-1' in the 'correct' format:
504 if (codecPrivateSize < 23 || (codecPrivate[13]&0xF0) != 0xF0 ||
505 (codecPrivate[15]&0xFC) != 0xFC || (codecPrivate[16]&0xFC) != 0xFC ||
506 (codecPrivate[17]&0xF8) != 0xF8 || (codecPrivate[18]&0xF8) != 0xF8) {
507 // The 'correct' format isn't being used, so assume the H.264 format instead:
508 track->codecPrivateUsesH264FormatForH265 = True;
509
510 // Byte 4 of the 'codec private' data contains 'lengthSizeMinusOne':
511 if (codecPrivateSize >= 5) track->subframeSizeSize = (codecPrivate[4]&0x3) + 1;
512 } else {
513 // This looks like the 'correct' format:
514 track->codecPrivateUsesH264FormatForH265 = False;
515
516 // Byte 21 of the 'codec private' data contains 'lengthSizeMinusOne':
517 track->subframeSizeSize = (codecPrivate[21]&0x3) + 1;
518 }
519 }
520 }
521 } else {
522 delete[] codecPrivate;
523 }
524 }
525 break;
526 }
527 case MATROSKA_ID_VIDEO: { // 'Video settings' header: enter this
528 break;
529 }
530 case MATROSKA_ID_PIXEL_WIDTH: {
531 unsigned pixelWidth;
532 if (parseEBMLVal_unsigned(size, pixelWidth)) {
533#ifdef DEBUG
534 fprintf(stderr, "\tPixel Width %d\n", pixelWidth);
535#endif
536 if (track != NULL) track->pixelWidth = pixelWidth;
537 }
538 break;
539 }
540 case MATROSKA_ID_PIXEL_HEIGHT: {
541 unsigned pixelHeight;
542 if (parseEBMLVal_unsigned(size, pixelHeight)) {
543#ifdef DEBUG
544 fprintf(stderr, "\tPixel Height %d\n", pixelHeight);
545#endif
546 if (track != NULL) track->pixelHeight = pixelHeight;
547 }
548 break;
549 }
550 case MATROSKA_ID_DISPLAY_WIDTH: {
551 unsigned displayWidth;
552 if (parseEBMLVal_unsigned(size, displayWidth)) {
553#ifdef DEBUG
554 fprintf(stderr, "\tDisplay Width %d\n", displayWidth);
555#endif
556 }
557 break;
558 }
559 case MATROSKA_ID_DISPLAY_HEIGHT: {
560 unsigned displayHeight;
561 if (parseEBMLVal_unsigned(size, displayHeight)) {
562#ifdef DEBUG
563 fprintf(stderr, "\tDisplay Height %d\n", displayHeight);
564#endif
565 }
566 break;
567 }
568 case MATROSKA_ID_DISPLAY_UNIT: {
569 unsigned displayUnit;
570 if (parseEBMLVal_unsigned(size, displayUnit)) {
571#ifdef DEBUG
572 fprintf(stderr, "\tDisplay Unit %d\n", displayUnit);
573#endif
574 }
575 break;
576 }
577 case MATROSKA_ID_AUDIO: { // 'Audio settings' header: enter this
578 break;
579 }
580 case MATROSKA_ID_SAMPLING_FREQUENCY: {
581 float samplingFrequency;
582 if (parseEBMLVal_float(size, samplingFrequency)) {
583 if (track != NULL) {
584 track->samplingFrequency = (unsigned)samplingFrequency;
585#ifdef DEBUG
586 fprintf(stderr, "\tSampling frequency %f (->%d)\n", samplingFrequency, track->samplingFrequency);
587#endif
588 }
589 }
590 break;
591 }
592 case MATROSKA_ID_OUTPUT_SAMPLING_FREQUENCY: {
593 float outputSamplingFrequency;
594 if (parseEBMLVal_float(size, outputSamplingFrequency)) {
595#ifdef DEBUG
596 fprintf(stderr, "\tOutput sampling frequency %f\n", outputSamplingFrequency);
597#endif
598 }
599 break;
600 }
601 case MATROSKA_ID_CHANNELS: {
602 unsigned numChannels;
603 if (parseEBMLVal_unsigned(size, numChannels)) {
604#ifdef DEBUG
605 fprintf(stderr, "\tChannels %d\n", numChannels);
606#endif
607 if (track != NULL) track->numChannels = numChannels;
608 }
609 break;
610 }
611 case MATROSKA_ID_BIT_DEPTH: {
612 unsigned bitDepth;
613 if (parseEBMLVal_unsigned(size, bitDepth)) {
614#ifdef DEBUG
615 fprintf(stderr, "\tBit Depth %d\n", bitDepth);
616#endif
617 if (track != NULL) track->bitDepth = bitDepth;
618 }
619 break;
620 }
621 case MATROSKA_ID_CONTENT_ENCODINGS:
622 case MATROSKA_ID_CONTENT_ENCODING: { // 'Content Encodings' or 'Content Encoding' header: enter this
623 break;
624 }
625 case MATROSKA_ID_CONTENT_COMPRESSION: { // 'Content Compression' header: enter this
626 // Note: We currently support only 'Header Stripping' compression, not 'zlib' compression (the default algorithm).
627 // Therefore, we disable this track, unless/until we later see that 'Header Stripping' is supported:
628 if (track != NULL) track->isEnabled = False;
629 break;
630 }
631 case MATROSKA_ID_CONTENT_COMP_ALGO: {
632 unsigned contentCompAlgo;
633 if (parseEBMLVal_unsigned(size, contentCompAlgo)) {
634#ifdef DEBUG
635 fprintf(stderr, "\tContent Compression Algorithm %d (%s)\n", contentCompAlgo,
636 contentCompAlgo == 0 ? "zlib" : contentCompAlgo == 3 ? "Header Stripping" : "<unknown>");
637#endif
638 // The only compression algorithm that we support is #3: Header Stripping; disable the track otherwise
639 if (track != NULL) track->isEnabled = contentCompAlgo == 3;
640 }
641 break;
642 }
643 case MATROSKA_ID_CONTENT_COMP_SETTINGS: {
644 u_int8_t* headerStrippedBytes;
645 unsigned headerStrippedBytesSize;
646 if (parseEBMLVal_binary(size, headerStrippedBytes)) {
647 headerStrippedBytesSize = (unsigned)size.val();
648#ifdef DEBUG
649 fprintf(stderr, "\tHeader Stripped Bytes: ");
650 for (unsigned i = 0; i < headerStrippedBytesSize; ++i) fprintf(stderr, "%02x:", headerStrippedBytes[i]);
651 fprintf(stderr, "\n");
652#endif
653 if (track != NULL) {
654 delete[] track->headerStrippedBytes; track->headerStrippedBytes = headerStrippedBytes;
655 track->headerStrippedBytesSize = headerStrippedBytesSize;
656 } else {
657 delete[] headerStrippedBytes;
658 }
659 }
660 break;
661 }
662 case MATROSKA_ID_CONTENT_ENCRYPTION: { // 'Content Encrpytion' header: skip this
663 // Note: We don't currently support encryption at all. Therefore, we disable this track:
664 if (track != NULL) track->isEnabled = False;
665 // Fall through to...
666 }
667 case MATROSKA_ID_COLOR_SPACE: {
668 u_int8_t* colourSpace;
669 unsigned colourSpaceSize;
670 if (parseEBMLVal_binary(size, colourSpace)) {
671 colourSpaceSize = (unsigned)size.val();
672#ifdef DEBUG
673 fprintf(stderr, "\tColor space : %02x %02x %02x %02x\n", colourSpace[0], colourSpace[1], colourSpace[2], colourSpace[3]);
674#endif
675 if ((track != NULL) && (colourSpaceSize == 4)) {
676 //convert to sampling value (rfc 4175)
677 if ((strncmp((const char*)colourSpace, "I420", 4) == 0) || (strncmp((const char*)colourSpace, "IYUV", 4) == 0)){
678 track->colorSampling = "YCbCr-4:2:0";
679 }
680 else if ((strncmp((const char*)colourSpace, "YUY2", 4) == 0) || (strncmp((const char*)colourSpace, "UYVY", 4) == 0)){
681 track->colorSampling = "YCbCr-4:2:2";
682 }
683 else if (strncmp((const char*)colourSpace, "AYUV", 4) == 0) {
684 track->colorSampling = "YCbCr-4:4:4";
685 }
686 else if ((strncmp((const char*)colourSpace, "Y41P", 4) == 0) || (strncmp((const char*)colourSpace, "Y41T", 4) == 0)) {
687 track->colorSampling = "YCbCr-4:1:1";
688 }
689 else if (strncmp((const char*)colourSpace, "RGBA", 4) == 0) {
690 track->colorSampling = "RGBA";
691 }
692 else if (strncmp((const char*)colourSpace, "BGRA", 4) == 0) {
693 track->colorSampling = "BGRA";
694 }
695 } else {
696 delete[] colourSpace;
697 }
698 }
699 break;
700 }
701 case MATROSKA_ID_PRIMARIES: {
702 unsigned primaries;
703 if (parseEBMLVal_unsigned(size, primaries)) {
704#ifdef DEBUG
705 fprintf(stderr, "\tPrimaries %u\n", primaries);
706#endif
707 if (track != NULL) {
708 switch (primaries) {
709 case 1: //ITU-R BT.709
710 track->colorimetry = "BT709-2";
711 break;
712 case 7: //SMPTE 240M
713 track->colorimetry = "SMPTE240M";
714 break;
715 case 2: //Unspecified
716 case 3: //Reserved
717 case 4: //ITU-R BT.470M
718 case 5: //ITU-R BT.470BG
719 case 6: //SMPTE 170M
720 case 8: //FILM
721 case 9: //ITU-R BT.2020
722 default:
723#ifdef DEBUG
724 fprintf(stderr, "\tUnsupported color primaries %u\n", primaries);
725#endif
726 break;
727 }
728 }
729 }
730 }
731 default: { // We don't process this header, so just skip over it:
732 skipHeader(size);
733 break;
734 }
735 }
736 setParseState();
737 }
738
739 fLimitOffsetInFile = 0; // reset
740 if (track != NULL && track->trackNumber == 0) delete track; // We had a previous "MatroskaTrack" object that was never used
741 return True; // we're done parsing track entries
742}
743
744void MatroskaFileParser::lookForNextBlock() {
745#ifdef DEBUG
746 fprintf(stderr, "looking for Block\n");
747#endif
748 // Read and skip over each Matroska header, until we get to a 'Cluster':
749 EBMLId id;
750 EBMLDataSize size;
751 while (fCurrentParseState == LOOKING_FOR_BLOCK) {
752 while (!parseEBMLIdAndSize(id, size)) {}
753#ifdef DEBUG
754 fprintf(stderr, "MatroskaFileParser::lookForNextBlock(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val());
755#endif
756 switch (id.val()) {
757 case MATROSKA_ID_SEGMENT: { // 'Segment' header: enter this
758 break;
759 }
760 case MATROSKA_ID_CLUSTER: { // 'Cluster' header: enter this
761 break;
762 }
763 case MATROSKA_ID_TIMECODE: { // 'Timecode' header: get this value
764 unsigned timecode;
765 if (parseEBMLVal_unsigned(size, timecode)) {
766 fClusterTimecode = timecode;
767#ifdef DEBUG
768 fprintf(stderr, "\tCluster timecode: %d (== %f seconds)\n", fClusterTimecode, fClusterTimecode*(fOurFile.fTimecodeScale/1000000000.0));
769#endif
770 }
771 break;
772 }
773 case MATROSKA_ID_BLOCK_GROUP: { // 'Block Group' header: enter this
774 break;
775 }
776 case MATROSKA_ID_SIMPLEBLOCK:
777 case MATROSKA_ID_BLOCK: { // 'SimpleBlock' or 'Block' header: enter this (and we're done)
778 fBlockSize = (unsigned)size.val();
779 fCurrentParseState = PARSING_BLOCK;
780 break;
781 }
782 case MATROSKA_ID_BLOCK_DURATION: { // 'Block Duration' header: get this value (but we currently don't do anything with it)
783 unsigned blockDuration;
784 if (parseEBMLVal_unsigned(size, blockDuration)) {
785#ifdef DEBUG
786 fprintf(stderr, "\tblock duration: %d (== %f ms)\n", blockDuration, (float)(blockDuration*fOurFile.fTimecodeScale/1000000.0));
787#endif
788 }
789 break;
790 }
791 // Attachments are parsed only if we're in DEBUG mode (otherwise we just skip over them):
792#ifdef DEBUG
793 case MATROSKA_ID_ATTACHMENTS: { // 'Attachments': enter this
794 break;
795 }
796 case MATROSKA_ID_ATTACHED_FILE: { // 'Attached File': enter this
797 break;
798 }
799 case MATROSKA_ID_FILE_DESCRIPTION: { // 'File Description': get this value
800 char* fileDescription;
801 if (parseEBMLVal_string(size, fileDescription)) {
802#ifdef DEBUG
803 fprintf(stderr, "\tFile Description: %s\n", fileDescription);
804#endif
805 delete[] fileDescription;
806 }
807 break;
808 }
809 case MATROSKA_ID_FILE_NAME: { // 'File Name': get this value
810 char* fileName;
811 if (parseEBMLVal_string(size, fileName)) {
812#ifdef DEBUG
813 fprintf(stderr, "\tFile Name: %s\n", fileName);
814#endif
815 delete[] fileName;
816 }
817 break;
818 }
819 case MATROSKA_ID_FILE_MIME_TYPE: { // 'File MIME Type': get this value
820 char* fileMIMEType;
821 if (parseEBMLVal_string(size, fileMIMEType)) {
822#ifdef DEBUG
823 fprintf(stderr, "\tFile MIME Type: %s\n", fileMIMEType);
824#endif
825 delete[] fileMIMEType;
826 }
827 break;
828 }
829 case MATROSKA_ID_FILE_UID: { // 'File UID': get this value
830 unsigned fileUID;
831 if (parseEBMLVal_unsigned(size, fileUID)) {
832#ifdef DEBUG
833 fprintf(stderr, "\tFile UID: 0x%x\n", fileUID);
834#endif
835 }
836 break;
837 }
838#endif
839 default: { // skip over this header
840 skipHeader(size);
841 break;
842 }
843 }
844 setParseState();
845 }
846}
847
848Boolean MatroskaFileParser::parseCues() {
849#if defined(DEBUG) || defined(DEBUG_CUES)
850 fprintf(stderr, "parsing Cues\n");
851#endif
852 EBMLId id;
853 EBMLDataSize size;
854
855 // Read the next header, which should be MATROSKA_ID_CUES:
856 if (!parseEBMLIdAndSize(id, size) || id != MATROSKA_ID_CUES) return True; // The header wasn't what we expected, so we're done
857 fLimitOffsetInFile = fCurOffsetInFile + size.val(); // Make sure we don't read past the end of this header
858
859 double currentCueTime = 0.0;
860 u_int64_t currentClusterOffsetInFile = 0;
861
862 while (fCurOffsetInFile < fLimitOffsetInFile) {
863 while (!parseEBMLIdAndSize(id, size)) {}
864#ifdef DEBUG_CUES
865 if (id == MATROSKA_ID_CUE_POINT) fprintf(stderr, "\n"); // makes debugging output easier to read
866 fprintf(stderr, "MatroskaFileParser::parseCues(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val());
867#endif
868 switch (id.val()) {
869 case MATROSKA_ID_CUE_POINT: { // 'Cue Point' header: enter this
870 break;
871 }
872 case MATROSKA_ID_CUE_TIME: { // 'Cue Time' header: get this value
873 unsigned cueTime;
874 if (parseEBMLVal_unsigned(size, cueTime)) {
875 currentCueTime = cueTime*(fOurFile.fTimecodeScale/1000000000.0);
876#ifdef DEBUG_CUES
877 fprintf(stderr, "\tCue Time %d (== %f seconds)\n", cueTime, currentCueTime);
878#endif
879 }
880 break;
881 }
882 case MATROSKA_ID_CUE_TRACK_POSITIONS: { // 'Cue Track Positions' header: enter this
883 break;
884 }
885 case MATROSKA_ID_CUE_TRACK: { // 'Cue Track' header: get this value (but only for debugging; we don't do anything with it)
886 unsigned cueTrack;
887 if (parseEBMLVal_unsigned(size, cueTrack)) {
888#ifdef DEBUG_CUES
889 fprintf(stderr, "\tCue Track %d\n", cueTrack);
890#endif
891 }
892 break;
893 }
894 case MATROSKA_ID_CUE_CLUSTER_POSITION: { // 'Cue Cluster Position' header: get this value
895 u_int64_t cueClusterPosition;
896 if (parseEBMLVal_unsigned64(size, cueClusterPosition)) {
897 currentClusterOffsetInFile = fOurFile.fSegmentDataOffset + cueClusterPosition;
898#ifdef DEBUG_CUES
899 fprintf(stderr, "\tCue Cluster Position %llu (=> offset within the file: %llu (0x%llx))\n", cueClusterPosition, currentClusterOffsetInFile, currentClusterOffsetInFile);
900#endif
901 // Record this cue point:
902 fOurFile.addCuePoint(currentCueTime, currentClusterOffsetInFile, 1/*default block number within cluster*/);
903 }
904 break;
905 }
906 case MATROSKA_ID_CUE_BLOCK_NUMBER: { // 'Cue Block Number' header: get this value
907 unsigned cueBlockNumber;
908 if (parseEBMLVal_unsigned(size, cueBlockNumber) && cueBlockNumber != 0) {
909#ifdef DEBUG_CUES
910 fprintf(stderr, "\tCue Block Number %d\n", cueBlockNumber);
911#endif
912 // Record this cue point (overwriting any existing entry for this cue time):
913 fOurFile.addCuePoint(currentCueTime, currentClusterOffsetInFile, cueBlockNumber);
914 }
915 break;
916 }
917 default: { // We don't process this header, so just skip over it:
918 skipHeader(size);
919 break;
920 }
921 }
922 setParseState();
923 }
924
925 fLimitOffsetInFile = 0; // reset
926#if defined(DEBUG) || defined(DEBUG_CUES)
927 fprintf(stderr, "done parsing Cues\n");
928#endif
929#ifdef DEBUG_CUES
930 fprintf(stderr, "Cue Point tree: ");
931 fOurFile.printCuePoints(stderr);
932 fprintf(stderr, "\n");
933#endif
934 return True; // we're done parsing Cues
935}
936
937typedef enum { NoLacing, XiphLacing, FixedSizeLacing, EBMLLacing } MatroskaLacingType;
938
939void MatroskaFileParser::parseBlock() {
940#ifdef DEBUG
941 fprintf(stderr, "parsing SimpleBlock or Block\n");
942#endif
943 do {
944 unsigned blockStartPos = curOffset();
945
946 // The block begins with the track number:
947 EBMLNumber trackNumber;
948 if (!parseEBMLNumber(trackNumber)) break;
949 fBlockTrackNumber = (unsigned)trackNumber.val();
950
951 // If this track is not being read, then skip the rest of this block, and look for another one:
952 if (fOurDemux->lookupDemuxedTrack(fBlockTrackNumber) == NULL) {
953 unsigned headerBytesSeen = curOffset() - blockStartPos;
954 if (headerBytesSeen < fBlockSize) {
955 skipBytes(fBlockSize - headerBytesSeen);
956 }
957#ifdef DEBUG
958 fprintf(stderr, "\tSkipped block for unused track number %d\n", fBlockTrackNumber);
959#endif
960 fCurrentParseState = LOOKING_FOR_BLOCK;
961 setParseState();
962 return;
963 }
964
965 MatroskaTrack* track = fOurFile.lookup(fBlockTrackNumber);
966 if (track == NULL) break; // shouldn't happen
967
968 // The next two bytes are the block's timecode (relative to the cluster timecode)
969 fBlockTimecode = (get1Byte()<<8)|get1Byte();
970
971 // The next byte indicates the type of 'lacing' used:
972 u_int8_t c = get1Byte();
973 c &= 0x6; // we're interested in bits 5-6 only
974 MatroskaLacingType lacingType = (c==0x0)?NoLacing : (c==0x02)?XiphLacing : (c==0x04)?FixedSizeLacing : EBMLLacing;
975#ifdef DEBUG
976 fprintf(stderr, "\ttrack number %d, timecode %d (=> %f seconds), %s lacing\n", fBlockTrackNumber, fBlockTimecode, (fClusterTimecode+fBlockTimecode)*(fOurFile.fTimecodeScale/1000000000.0), (lacingType==NoLacing)?"no" : (lacingType==XiphLacing)?"Xiph" : (lacingType==FixedSizeLacing)?"fixed-size" : "EBML");
977#endif
978
979 if (lacingType == NoLacing) {
980 fNumFramesInBlock = 1;
981 } else {
982 // The next byte tells us how many frames are present in this block
983 fNumFramesInBlock = get1Byte() + 1;
984 }
985 delete[] fFrameSizesWithinBlock; fFrameSizesWithinBlock = new unsigned[fNumFramesInBlock];
986 if (fFrameSizesWithinBlock == NULL) break;
987
988 if (lacingType == NoLacing) {
989 unsigned headerBytesSeen = curOffset() - blockStartPos;
990 if (headerBytesSeen > fBlockSize) break;
991
992 fFrameSizesWithinBlock[0] = fBlockSize - headerBytesSeen;
993 } else if (lacingType == FixedSizeLacing) {
994 unsigned headerBytesSeen = curOffset() - blockStartPos;
995 if (headerBytesSeen > fBlockSize) break;
996
997 unsigned frameBytesAvailable = fBlockSize - headerBytesSeen;
998 unsigned constantFrameSize = frameBytesAvailable/fNumFramesInBlock;
999
1000 for (unsigned i = 0; i < fNumFramesInBlock; ++i) {
1001 fFrameSizesWithinBlock[i] = constantFrameSize;
1002 }
1003 // If there are any bytes left over, assign them to the last frame:
1004 fFrameSizesWithinBlock[fNumFramesInBlock-1] += frameBytesAvailable%fNumFramesInBlock;
1005 } else { // EBML or Xiph lacing
1006 unsigned curFrameSize = 0;
1007 unsigned frameSizesTotal = 0;
1008 unsigned i;
1009
1010 for (i = 0; i < fNumFramesInBlock-1; ++i) {
1011 if (lacingType == EBMLLacing) {
1012 EBMLNumber frameSize;
1013 if (!parseEBMLNumber(frameSize)) break;
1014 unsigned fsv = (unsigned)frameSize.val();
1015
1016 if (i == 0) {
1017 curFrameSize = fsv;
1018 } else {
1019 // The value we read is a signed value, that's added to the previous frame size, to get the current frame size:
1020 unsigned toSubtract = (fsv>0xFFFFFF)?0x07FFFFFF : (fsv>0xFFFF)?0x0FFFFF : (fsv>0xFF)?0x1FFF : 0x3F;
1021 int fsv_signed = fsv - toSubtract;
1022 curFrameSize += fsv_signed;
1023 if ((int)curFrameSize < 0) break;
1024 }
1025 } else { // Xiph lacing
1026 curFrameSize = 0;
1027 u_int8_t c;
1028 do {
1029 c = get1Byte();
1030 curFrameSize += c;
1031 } while (c == 0xFF);
1032 }
1033 fFrameSizesWithinBlock[i] = curFrameSize;
1034 frameSizesTotal += curFrameSize;
1035 }
1036 if (i != fNumFramesInBlock-1) break; // an error occurred within the "for" loop
1037
1038 // Compute the size of the final frame within the block (from the block's size, and the frame sizes already computed):)
1039 unsigned headerBytesSeen = curOffset() - blockStartPos;
1040 if (headerBytesSeen + frameSizesTotal > fBlockSize) break;
1041 fFrameSizesWithinBlock[i] = fBlockSize - (headerBytesSeen + frameSizesTotal);
1042 }
1043
1044 // We're done parsing headers within the block, and (as a result) we now know the sizes of all frames within the block.
1045 // If we have 'stripped bytes' that are common to (the front of) all frames, then count them now:
1046 if (track->headerStrippedBytesSize != 0) {
1047 for (unsigned i = 0; i < fNumFramesInBlock; ++i) fFrameSizesWithinBlock[i] += track->headerStrippedBytesSize;
1048 }
1049#ifdef DEBUG
1050 fprintf(stderr, "\tThis block contains %d frame(s); size(s):", fNumFramesInBlock);
1051 unsigned frameSizesTotal = 0;
1052 for (unsigned i = 0; i < fNumFramesInBlock; ++i) {
1053 fprintf(stderr, " %d", fFrameSizesWithinBlock[i]);
1054 frameSizesTotal += fFrameSizesWithinBlock[i];
1055 }
1056 if (fNumFramesInBlock > 1) fprintf(stderr, " (total: %u)", frameSizesTotal);
1057 fprintf(stderr, " bytes\n");
1058#endif
1059 // Next, start delivering these frames:
1060 fCurrentParseState = DELIVERING_FRAME_WITHIN_BLOCK;
1061 fCurOffsetWithinFrame = fNextFrameNumberToDeliver = 0;
1062 setParseState();
1063 return;
1064 } while (0);
1065
1066 // An error occurred. Try to recover:
1067#ifdef DEBUG
1068 fprintf(stderr, "parseBlock(): Error parsing data; trying to recover...\n");
1069#endif
1070 fCurrentParseState = LOOKING_FOR_BLOCK;
1071}
1072
1073Boolean MatroskaFileParser::deliverFrameWithinBlock() {
1074#ifdef DEBUG
1075 fprintf(stderr, "delivering frame within SimpleBlock or Block\n");
1076#endif
1077 do {
1078 MatroskaTrack* track = fOurFile.lookup(fBlockTrackNumber);
1079 if (track == NULL) break; // shouldn't happen
1080
1081 MatroskaDemuxedTrack* demuxedTrack = fOurDemux->lookupDemuxedTrack(fBlockTrackNumber);
1082 if (demuxedTrack == NULL) break; // shouldn't happen
1083 if (!demuxedTrack->isCurrentlyAwaitingData()) {
1084 // Someone has been reading this stream, but isn't right now.
1085 // We can't deliver this frame until he asks for it, so punt for now.
1086 // The next time he asks for a frame, he'll get it.
1087#ifdef DEBUG
1088 fprintf(stderr, "\tdeferring delivery of frame #%d (%d bytes)", fNextFrameNumberToDeliver, fFrameSizesWithinBlock[fNextFrameNumberToDeliver]);
1089 if (track->haveSubframes()) fprintf(stderr, "[offset %d]", fCurOffsetWithinFrame);
1090 fprintf(stderr, "\n");
1091#endif
1092 restoreSavedParserState(); // so we read from the beginning next time
1093 return False;
1094 }
1095
1096 unsigned frameSize;
1097 u_int8_t const* specialFrameSource = NULL;
1098 u_int8_t const opusCommentHeader[16]
1099 = {'O','p','u','s','T','a','g','s', 0, 0, 0, 0, 0, 0, 0, 0};
1100 if (track->codecIsOpus && demuxedTrack->fOpusTrackNumber < 2) {
1101 // Special case for Opus audio. The first frame (the 'configuration' header) comes from
1102 // the 'private data'. The second frame (the 'comment' header) comes is synthesized by
1103 // us here:
1104 if (demuxedTrack->fOpusTrackNumber == 0) {
1105 specialFrameSource = track->codecPrivate;
1106 frameSize = track->codecPrivateSize;
1107 } else { // demuxedTrack->fOpusTrackNumber == 1
1108 specialFrameSource = opusCommentHeader;
1109 frameSize = sizeof opusCommentHeader;
1110 }
1111 ++demuxedTrack->fOpusTrackNumber;
1112 } else {
1113 frameSize = fFrameSizesWithinBlock[fNextFrameNumberToDeliver];
1114 if (track->haveSubframes()) {
1115 // The next "track->subframeSizeSize" bytes contain the length of a 'subframe':
1116 if (fCurOffsetWithinFrame + track->subframeSizeSize > frameSize) break; // sanity check
1117 unsigned subframeSize = 0;
1118 for (unsigned i = 0; i < track->subframeSizeSize; ++i) {
1119 u_int8_t c;
1120 getCommonFrameBytes(track, &c, 1, 0);
1121 if (fCurFrameNumBytesToGet > 0) { // it'll be 1
1122 c = get1Byte();
1123 ++fCurOffsetWithinFrame;
1124 }
1125 subframeSize = subframeSize*256 + c;
1126 }
1127 if (subframeSize == 0 || fCurOffsetWithinFrame + subframeSize > frameSize) break; // sanity check
1128 frameSize = subframeSize;
1129 }
1130 }
1131
1132 // Compute the presentation time of this frame (from the cluster timecode, the block timecode, and the default duration):
1133 double pt = (fClusterTimecode+fBlockTimecode)*(fOurFile.fTimecodeScale/1000000000.0)
1134 + fNextFrameNumberToDeliver*(track->defaultDuration/1000000000.0);
1135 if (fPresentationTimeOffset == 0.0) {
1136 // This is the first time we've computed a presentation time. Compute an offset to make the presentation times aligned
1137 // with 'wall clock' time:
1138 struct timeval timeNow;
1139 gettimeofday(&timeNow, NULL);
1140 double ptNow = timeNow.tv_sec + timeNow.tv_usec/1000000.0;
1141 fPresentationTimeOffset = ptNow - pt;
1142 }
1143 pt += fPresentationTimeOffset;
1144 struct timeval presentationTime;
1145 presentationTime.tv_sec = (unsigned)pt;
1146 presentationTime.tv_usec = (unsigned)((pt - presentationTime.tv_sec)*1000000);
1147 unsigned durationInMicroseconds;
1148 if (specialFrameSource != NULL) {
1149 durationInMicroseconds = 0;
1150 } else { // normal case
1151 durationInMicroseconds = track->defaultDuration/1000;
1152 if (track->haveSubframes()) {
1153 // If this is a 'subframe', use a duration of 0 instead (unless it's the last 'subframe'):
1154 if (fCurOffsetWithinFrame + frameSize + track->subframeSizeSize < fFrameSizesWithinBlock[fNextFrameNumberToDeliver]) {
1155 // There's room for at least one more subframe after this, so give this subframe a duration of 0
1156 durationInMicroseconds = 0;
1157 }
1158 }
1159 }
1160
1161 if (track->defaultDuration == 0) {
1162 // Adjust the frame duration to keep the sum of frame durations aligned with presentation times.
1163 if (demuxedTrack->prevPresentationTime().tv_sec != 0) { // not the first time for this track
1164 demuxedTrack->durationImbalance()
1165 += (presentationTime.tv_sec - demuxedTrack->prevPresentationTime().tv_sec)*1000000
1166 + (presentationTime.tv_usec - demuxedTrack->prevPresentationTime().tv_usec);
1167 }
1168 int adjustment = 0;
1169 if (demuxedTrack->durationImbalance() > 0) {
1170 // The duration needs to be increased.
1171 int const adjustmentThreshold = 100000; // don't increase the duration by more than this amount (in case there's a mistake)
1172 adjustment = demuxedTrack->durationImbalance() > adjustmentThreshold
1173 ? adjustmentThreshold : demuxedTrack->durationImbalance();
1174 } else if (demuxedTrack->durationImbalance() < 0) {
1175 // The duration needs to be decreased.
1176 adjustment = (unsigned)(-demuxedTrack->durationImbalance()) < durationInMicroseconds
1177 ? demuxedTrack->durationImbalance() : -(int)durationInMicroseconds;
1178 }
1179 durationInMicroseconds += adjustment;
1180 demuxedTrack->durationImbalance() -= durationInMicroseconds; // for next time
1181 demuxedTrack->prevPresentationTime() = presentationTime; // for next time
1182 }
1183
1184 demuxedTrack->presentationTime() = presentationTime;
1185 demuxedTrack->durationInMicroseconds() = durationInMicroseconds;
1186
1187 // Deliver the next block now:
1188 if (frameSize > demuxedTrack->maxSize()) {
1189 demuxedTrack->numTruncatedBytes() = frameSize - demuxedTrack->maxSize();
1190 demuxedTrack->frameSize() = demuxedTrack->maxSize();
1191 } else { // normal case
1192 demuxedTrack->numTruncatedBytes() = 0;
1193 demuxedTrack->frameSize() = frameSize;
1194 }
1195 getCommonFrameBytes(track, demuxedTrack->to(), demuxedTrack->frameSize(), demuxedTrack->numTruncatedBytes());
1196
1197 // Next, deliver (and/or skip) bytes from the input file:
1198 if (specialFrameSource != NULL) {
1199 memmove(demuxedTrack->to(), specialFrameSource, demuxedTrack->frameSize());
1200#ifdef DEBUG
1201 fprintf(stderr, "\tdelivered special frame: %d bytes", demuxedTrack->frameSize());
1202 if (demuxedTrack->numTruncatedBytes() > 0) fprintf(stderr, " (%d bytes truncated)", demuxedTrack->numTruncatedBytes());
1203 fprintf(stderr, " @%u.%06u (%.06f from start); duration %u us\n", demuxedTrack->presentationTime().tv_sec, demuxedTrack->presentationTime().tv_usec, demuxedTrack->presentationTime().tv_sec+demuxedTrack->presentationTime().tv_usec/1000000.0-fPresentationTimeOffset, demuxedTrack->durationInMicroseconds());
1204#endif
1205 setParseState();
1206 FramedSource::afterGetting(demuxedTrack); // completes delivery
1207 } else { // normal case
1208 fCurrentParseState = DELIVERING_FRAME_BYTES;
1209 setParseState();
1210 }
1211 return True;
1212 } while (0);
1213
1214 // An error occurred. Try to recover:
1215#ifdef DEBUG
1216 fprintf(stderr, "deliverFrameWithinBlock(): Error parsing data; trying to recover...\n");
1217#endif
1218 fCurrentParseState = LOOKING_FOR_BLOCK;
1219 return True;
1220}
1221
1222void MatroskaFileParser::deliverFrameBytes() {
1223 do {
1224 MatroskaTrack* track = fOurFile.lookup(fBlockTrackNumber);
1225 if (track == NULL) break; // shouldn't happen
1226
1227 MatroskaDemuxedTrack* demuxedTrack = fOurDemux->lookupDemuxedTrack(fBlockTrackNumber);
1228 if (demuxedTrack == NULL) break; // shouldn't happen
1229
1230 unsigned const BANK_SIZE = bankSize();
1231 while (fCurFrameNumBytesToGet > 0) {
1232 // Hack: We can get no more than BANK_SIZE bytes at a time:
1233 unsigned numBytesToGet = fCurFrameNumBytesToGet > BANK_SIZE ? BANK_SIZE : fCurFrameNumBytesToGet;
1234 getBytes(fCurFrameTo, numBytesToGet);
1235 fCurFrameTo += numBytesToGet;
1236 fCurFrameNumBytesToGet -= numBytesToGet;
1237 fCurOffsetWithinFrame += numBytesToGet;
1238 setParseState();
1239 }
1240 while (fCurFrameNumBytesToSkip > 0) {
1241 // Hack: We can skip no more than BANK_SIZE bytes at a time:
1242 unsigned numBytesToSkip = fCurFrameNumBytesToSkip > BANK_SIZE ? BANK_SIZE : fCurFrameNumBytesToSkip;
1243 skipBytes(numBytesToSkip);
1244 fCurFrameNumBytesToSkip -= numBytesToSkip;
1245 fCurOffsetWithinFrame += numBytesToSkip;
1246 setParseState();
1247 }
1248#ifdef DEBUG
1249 fprintf(stderr, "\tdelivered frame #%d: %d bytes", fNextFrameNumberToDeliver, demuxedTrack->frameSize());
1250 if (track->haveSubframes()) fprintf(stderr, "[offset %d]", fCurOffsetWithinFrame - track->subframeSizeSize - demuxedTrack->frameSize() - demuxedTrack->numTruncatedBytes());
1251 if (demuxedTrack->numTruncatedBytes() > 0) fprintf(stderr, " (%d bytes truncated)", demuxedTrack->numTruncatedBytes());
1252 fprintf(stderr, " @%u.%06u (%.06f from start); duration %u us\n", demuxedTrack->presentationTime().tv_sec, demuxedTrack->presentationTime().tv_usec, demuxedTrack->presentationTime().tv_sec+demuxedTrack->presentationTime().tv_usec/1000000.0-fPresentationTimeOffset, demuxedTrack->durationInMicroseconds());
1253#endif
1254
1255 if (!track->haveSubframes()
1256 || fCurOffsetWithinFrame + track->subframeSizeSize >= fFrameSizesWithinBlock[fNextFrameNumberToDeliver]) {
1257 // Either we don't have subframes, or there's no more room for another subframe => We're completely done with this frame now:
1258 ++fNextFrameNumberToDeliver;
1259 fCurOffsetWithinFrame = 0;
1260 }
1261 if (fNextFrameNumberToDeliver == fNumFramesInBlock) {
1262 // We've delivered all of the frames from this block. Look for another block next:
1263 fCurrentParseState = LOOKING_FOR_BLOCK;
1264 } else {
1265 fCurrentParseState = DELIVERING_FRAME_WITHIN_BLOCK;
1266 }
1267
1268 setParseState();
1269 FramedSource::afterGetting(demuxedTrack); // completes delivery
1270 return;
1271 } while (0);
1272
1273 // An error occurred. Try to recover:
1274#ifdef DEBUG
1275 fprintf(stderr, "deliverFrameBytes(): Error parsing data; trying to recover...\n");
1276#endif
1277 fCurrentParseState = LOOKING_FOR_BLOCK;
1278}
1279
1280void MatroskaFileParser
1281::getCommonFrameBytes(MatroskaTrack* track, u_int8_t* to, unsigned numBytesToGet, unsigned numBytesToSkip) {
1282 if (track->headerStrippedBytesSize > fCurOffsetWithinFrame) {
1283 // We have some common 'header stripped' bytes that remain to be prepended to the frame. Use these first:
1284 unsigned numRemainingHeaderStrippedBytes = track->headerStrippedBytesSize - fCurOffsetWithinFrame;
1285 unsigned numHeaderStrippedBytesToGet;
1286 if (numBytesToGet <= numRemainingHeaderStrippedBytes) {
1287 numHeaderStrippedBytesToGet = numBytesToGet;
1288 numBytesToGet = 0;
1289 if (numBytesToGet + numBytesToSkip <= numRemainingHeaderStrippedBytes) {
1290 numBytesToSkip = 0;
1291 } else {
1292 numBytesToSkip = numBytesToGet + numBytesToSkip - numRemainingHeaderStrippedBytes;
1293 }
1294 } else {
1295 numHeaderStrippedBytesToGet = numRemainingHeaderStrippedBytes;
1296 numBytesToGet = numBytesToGet - numRemainingHeaderStrippedBytes;
1297 }
1298
1299 if (numHeaderStrippedBytesToGet > 0) {
1300 memmove(to, &track->headerStrippedBytes[fCurOffsetWithinFrame], numHeaderStrippedBytesToGet);
1301 to += numHeaderStrippedBytesToGet;
1302 fCurOffsetWithinFrame += numHeaderStrippedBytesToGet;
1303 }
1304 }
1305
1306 fCurFrameTo = to;
1307 fCurFrameNumBytesToGet = numBytesToGet;
1308 fCurFrameNumBytesToSkip = numBytesToSkip;
1309}
1310
1311Boolean MatroskaFileParser::parseEBMLNumber(EBMLNumber& num) {
1312 unsigned i;
1313 u_int8_t bitmask = 0x80;
1314 for (i = 0; i < EBML_NUMBER_MAX_LEN; ++i) {
1315 while (1) {
1316 if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) return False; // We've hit our pre-set limit
1317 num.data[i] = get1Byte();
1318 ++fCurOffsetInFile;
1319
1320 // If we're looking for an id, skip any leading bytes that don't contain a '1' in the first 4 bits:
1321 if (i == 0/*we're a leading byte*/ && !num.stripLeading1/*we're looking for an id*/ && (num.data[i]&0xF0) == 0) {
1322 setParseState(); // ensures that we make forward progress if the parsing gets interrupted
1323 continue;
1324 }
1325 break;
1326 }
1327 if ((num.data[0]&bitmask) != 0) {
1328 // num[i] is the last byte of the id
1329 if (num.stripLeading1) num.data[0] &=~ bitmask;
1330 break;
1331 }
1332 bitmask >>= 1;
1333 }
1334 if (i == EBML_NUMBER_MAX_LEN) return False;
1335
1336 num.len = i+1;
1337 return True;
1338}
1339
1340Boolean MatroskaFileParser::parseEBMLIdAndSize(EBMLId& id, EBMLDataSize& size) {
1341 return parseEBMLNumber(id) && parseEBMLNumber(size);
1342}
1343
1344Boolean MatroskaFileParser::parseEBMLVal_unsigned64(EBMLDataSize& size, u_int64_t& result) {
1345 u_int64_t sv = size.val();
1346 if (sv > 8) return False; // size too large
1347
1348 result = 0; // initially
1349 for (unsigned i = (unsigned)sv; i > 0; --i) {
1350 if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) return False; // We've hit our pre-set limit
1351
1352 u_int8_t c = get1Byte();
1353 ++fCurOffsetInFile;
1354
1355 result = result*256 + c;
1356 }
1357
1358 return True;
1359}
1360
1361Boolean MatroskaFileParser::parseEBMLVal_unsigned(EBMLDataSize& size, unsigned& result) {
1362 if (size.val() > 4) return False; // size too large
1363
1364 u_int64_t result64;
1365 if (!parseEBMLVal_unsigned64(size, result64)) return False;
1366
1367 result = (unsigned)result64;
1368
1369 return True;
1370}
1371
1372Boolean MatroskaFileParser::parseEBMLVal_float(EBMLDataSize& size, float& result) {
1373 if (size.val() == 4) {
1374 // Normal case. Read the value as if it were a 4-byte integer, then copy it to the 'float' result:
1375 unsigned resultAsUnsigned;
1376 if (!parseEBMLVal_unsigned(size, resultAsUnsigned)) return False;
1377
1378 if (sizeof result != sizeof resultAsUnsigned) return False;
1379 memcpy(&result, &resultAsUnsigned, sizeof result);
1380 return True;
1381 } else if (size.val() == 8) {
1382 // Read the value as if it were an 8-byte integer, then copy it to a 'double', the convert that to the 'float' result:
1383 u_int64_t resultAsUnsigned64;
1384 if (!parseEBMLVal_unsigned64(size, resultAsUnsigned64)) return False;
1385
1386 double resultDouble;
1387 if (sizeof resultDouble != sizeof resultAsUnsigned64) return False;
1388 memcpy(&resultDouble, &resultAsUnsigned64, sizeof resultDouble);
1389
1390 result = (float)resultDouble;
1391 return True;
1392 } else {
1393 // Unworkable size
1394 return False;
1395 }
1396}
1397
1398Boolean MatroskaFileParser::parseEBMLVal_string(EBMLDataSize& size, char*& result) {
1399 unsigned resultLength = (unsigned)size.val();
1400 result = new char[resultLength + 1]; // allow for the trailing '\0'
1401 if (result == NULL) return False;
1402
1403 char* p = result;
1404 unsigned i;
1405 for (i = 0; i < resultLength; ++i) {
1406 if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) break; // We've hit our pre-set limit
1407
1408 u_int8_t c = get1Byte();
1409 ++fCurOffsetInFile;
1410
1411 *p++ = c;
1412 }
1413 if (i < resultLength) { // an error occurred
1414 delete[] result;
1415 result = NULL;
1416 return False;
1417 }
1418 *p = '\0';
1419
1420 return True;
1421}
1422
1423Boolean MatroskaFileParser::parseEBMLVal_binary(EBMLDataSize& size, u_int8_t*& result) {
1424 unsigned resultLength = (unsigned)size.val();
1425 result = new u_int8_t[resultLength];
1426 if (result == NULL) return False;
1427
1428 u_int8_t* p = result;
1429 unsigned i;
1430 for (i = 0; i < resultLength; ++i) {
1431 if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) break; // We've hit our pre-set limit
1432
1433 u_int8_t c = get1Byte();
1434 ++fCurOffsetInFile;
1435
1436 *p++ = c;
1437 }
1438 if (i < resultLength) { // an error occurred
1439 delete[] result;
1440 result = NULL;
1441 return False;
1442 }
1443
1444 return True;
1445}
1446
1447void MatroskaFileParser::skipHeader(EBMLDataSize const& size) {
1448 u_int64_t sv = (unsigned)size.val();
1449#ifdef DEBUG
1450 fprintf(stderr, "\tskipping %llu bytes\n", sv);
1451#endif
1452
1453 fNumHeaderBytesToSkip = sv;
1454 skipRemainingHeaderBytes(False);
1455}
1456
1457void MatroskaFileParser::skipRemainingHeaderBytes(Boolean isContinuation) {
1458 if (fNumHeaderBytesToSkip == 0) return; // common case
1459
1460 // Hack: To avoid tripping into a parser 'internal error' if we try to skip an excessively large
1461 // distance, break up the skipping into manageable chunks, to ensure forward progress:
1462 unsigned const maxBytesToSkip = bankSize();
1463 while (fNumHeaderBytesToSkip > 0) {
1464 unsigned numBytesToSkipNow
1465 = fNumHeaderBytesToSkip < maxBytesToSkip ? (unsigned)fNumHeaderBytesToSkip : maxBytesToSkip;
1466 setParseState();
1467 skipBytes(numBytesToSkipNow);
1468#ifdef DEBUG
1469 if (isContinuation || numBytesToSkipNow < fNumHeaderBytesToSkip) {
1470 fprintf(stderr, "\t\t(skipped %u bytes; %llu bytes remaining)\n",
1471 numBytesToSkipNow, fNumHeaderBytesToSkip - numBytesToSkipNow);
1472 }
1473#endif
1474 fCurOffsetInFile += numBytesToSkipNow;
1475 fNumHeaderBytesToSkip -= numBytesToSkipNow;
1476 }
1477}
1478
1479void MatroskaFileParser::setParseState() {
1480 fSavedCurOffsetInFile = fCurOffsetInFile;
1481 fSavedCurOffsetWithinFrame = fCurOffsetWithinFrame;
1482 saveParserState();
1483}
1484
1485void MatroskaFileParser::restoreSavedParserState() {
1486 StreamParser::restoreSavedParserState();
1487 fCurOffsetInFile = fSavedCurOffsetInFile;
1488 fCurOffsetWithinFrame = fSavedCurOffsetWithinFrame;
1489}
1490
1491void MatroskaFileParser::seekToFilePosition(u_int64_t offsetInFile) {
1492 ByteStreamFileSource* fileSource = (ByteStreamFileSource*)fInputSource; // we know it's a "ByteStreamFileSource"
1493 if (fileSource != NULL) {
1494 fileSource->seekToByteAbsolute(offsetInFile);
1495 resetStateAfterSeeking();
1496 }
1497}
1498
1499void MatroskaFileParser::seekToEndOfFile() {
1500 ByteStreamFileSource* fileSource = (ByteStreamFileSource*)fInputSource; // we know it's a "ByteStreamFileSource"
1501 if (fileSource != NULL) {
1502 fileSource->seekToEnd();
1503 resetStateAfterSeeking();
1504 }
1505}
1506
1507void MatroskaFileParser::resetStateAfterSeeking() {
1508 // Because we're resuming parsing after seeking to a new position in the file, reset the parser state:
1509 fCurOffsetInFile = fSavedCurOffsetInFile = 0;
1510 fCurOffsetWithinFrame = fSavedCurOffsetWithinFrame = 0;
1511 flushInput();
1512}
1513