Skip to content

Commit c3a0e1d

Browse files
ufleischTolriq
andauthored
Matroska: Use seek head for faster element lookup (#1321)
Limit scan for Matroska seek head to 512 KB in ReadStyle::Fast --------- Co-authored-by: tolriq <git@leetzone.org>
1 parent 13751f5 commit c3a0e1d

7 files changed

Lines changed: 101 additions & 17 deletions

File tree

examples/tagreader.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,3 @@ int main(int argc, char *argv[])
117117
}
118118
return 0;
119119
}
120-

taglib/fileref.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ namespace
225225
#endif
226226
#ifdef TAGLIB_WITH_MATROSKA
227227
else if(ext == "MKA" || ext == "MKV" || ext == "WEBM")
228-
file = new Matroska::File(stream, readAudioProperties);
228+
file = new Matroska::File(stream, readAudioProperties, audioPropertiesStyle);
229229
#endif
230230

231231
// if file is not valid, leave it to content-based detection.

taglib/matroska/ebml/ebmlmksegment.cpp

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,32 @@
3030

3131
using namespace TagLib;
3232

33+
namespace {
34+
35+
template <EBML::Element::Id Id, typename ElementType>
36+
std::unique_ptr<ElementType> readElementAt(File &file,
37+
offset_t offset,
38+
offset_t maxOffset)
39+
{
40+
if(offset < 0 || offset >= maxOffset) {
41+
return nullptr;
42+
}
43+
44+
file.seek(offset);
45+
auto element = EBML::Element::factory(file);
46+
if(!element || element->getId() != Id) {
47+
return nullptr;
48+
}
49+
50+
auto typed = EBML::element_cast<Id>(std::move(element));
51+
if(!typed || !typed->read(file)) {
52+
return nullptr;
53+
}
54+
return typed;
55+
}
56+
57+
} // namespace
58+
3359
EBML::MkSegment::MkSegment(int sizeLength, offset_t dataSize, offset_t offset):
3460
MasterElement(Id::MkSegment, sizeLength, dataSize, offset)
3561
{
@@ -49,16 +75,64 @@ offset_t EBML::MkSegment::segmentDataOffset() const
4975

5076
bool EBML::MkSegment::read(File &file)
5177
{
52-
const offset_t maxOffset = file.tell() + dataSize;
78+
return readLimited(file, dataSize);
79+
}
80+
81+
bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
82+
{
83+
const offset_t filePos = file.tell();
84+
const offset_t maxOffset = filePos + dataSize;
85+
const offset_t maxScanOffset = filePos + std::min(scanLimit, dataSize);
5386
std::unique_ptr<Element> element;
54-
int i = 0;
55-
int seekHeadIndex = -1;
56-
while((element = findNextElement(file, maxOffset))) {
87+
while((element = findNextElement(file, maxScanOffset))) {
5788
if(const Id id = element->getId(); id == Id::MkSeekHead) {
58-
seekHeadIndex = i;
5989
seekHead = element_cast<Id::MkSeekHead>(std::move(element));
6090
if(!seekHead->read(file))
6191
return false;
92+
// We have a seek head, let's use it for faster access to the other elements
93+
if(const auto elementAfterSeekHead = findNextElement(file, maxScanOffset);
94+
elementAfterSeekHead && elementAfterSeekHead->getId() == Id::VoidElement)
95+
seekHead->setPadding(elementAfterSeekHead->getSize());
96+
const offset_t segDataOffset = segmentDataOffset();
97+
const auto matroskaSeekHead = parseSeekHead();
98+
for(const auto &[idValue, relativeOffset] : matroskaSeekHead->entryList()) {
99+
const offset_t absoluteOffset = segDataOffset + relativeOffset;
100+
switch(static_cast<Id>(idValue)) {
101+
case Id::MkCues:
102+
if(!((cues = readElementAt<Id::MkCues, MkCues>(
103+
file, absoluteOffset, maxOffset))))
104+
return false;
105+
break;
106+
case Id::MkInfo:
107+
if(!((info = readElementAt<Id::MkInfo, MkInfo>(
108+
file, absoluteOffset, maxOffset))))
109+
return false;
110+
break;
111+
case Id::MkTracks:
112+
if(!((tracks = readElementAt<Id::MkTracks, MkTracks>(
113+
file, absoluteOffset, maxOffset))))
114+
return false;
115+
break;
116+
case Id::MkTags:
117+
if(!((tags = readElementAt<Id::MkTags, MkTags>(
118+
file, absoluteOffset, maxOffset))))
119+
return false;
120+
break;
121+
case Id::MkAttachments:
122+
if(!((attachments = readElementAt<Id::MkAttachments, MkAttachments>(
123+
file, absoluteOffset, maxOffset))))
124+
return false;
125+
break;
126+
case Id::MkChapters:
127+
if(!((chapters = readElementAt<Id::MkChapters, MkChapters>(
128+
file, absoluteOffset, maxOffset))))
129+
return false;
130+
break;
131+
default:
132+
break;
133+
}
134+
}
135+
return true;
62136
}
63137
else if(id == Id::MkCues) {
64138
cues = element_cast<Id::MkCues>(std::move(element));
@@ -91,14 +165,8 @@ bool EBML::MkSegment::read(File &file)
91165
return false;
92166
}
93167
else {
94-
if(id == Id::VoidElement
95-
&& seekHead
96-
&& seekHeadIndex == i - 1)
97-
seekHead->setPadding(element->getSize());
98-
99168
element->skipData(file);
100169
}
101-
i++;
102170
}
103171
return true;
104172
}

taglib/matroska/ebml/ebmlmksegment.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ namespace TagLib {
5151

5252
offset_t segmentDataOffset() const;
5353
bool read(File &file) override;
54+
bool readLimited(File &file, offset_t scanLimit);
5455
std::unique_ptr<Matroska::Tag> parseTag() const;
5556
std::unique_ptr<Matroska::Attachments> parseAttachments() const;
5657
std::unique_ptr<Matroska::Chapters> parseChapters() const;

taglib/matroska/matroskafile.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ PropertyMap Matroska::File::setProperties(const PropertyMap &properties)
144144

145145
namespace {
146146

147+
constexpr offset_t FAST_SCAN_LIMIT = static_cast<offset_t>(512 * 1024);
148+
147149
String keyForAttachedFile(const Matroska::AttachedFile &attachedFile)
148150
{
149151
if(attachedFile.mediaType().startsWith("image/")) {
@@ -376,10 +378,15 @@ void Matroska::File::read(bool readProperties, Properties::ReadStyle readStyle)
376378
head->skipData(*this);
377379
}
378380

381+
offset_t maxOffset = fileLength - tell();
382+
if (readStyle == Properties::ReadStyle::Fast && maxOffset > FAST_SCAN_LIMIT) {
383+
maxOffset = FAST_SCAN_LIMIT;
384+
}
385+
379386
// Find the Matroska segment in the file
380387
const std::unique_ptr<EBML::MkSegment> segment(
381388
EBML::element_cast<EBML::Element::Id::MkSegment>(
382-
EBML::findElement(*this, EBML::Element::Id::MkSegment, fileLength - tell())
389+
EBML::findElement(*this, EBML::Element::Id::MkSegment, maxOffset)
383390
)
384391
);
385392
if(!segment) {
@@ -389,14 +396,18 @@ void Matroska::File::read(bool readProperties, Properties::ReadStyle readStyle)
389396
}
390397

391398
// Read the segment into memory from file
392-
if(!segment->read(*this)) {
399+
d->segment = segment->parseSegment();
400+
maxOffset = segment->getDataSize();
401+
if (readStyle == Properties::ReadStyle::Fast && maxOffset > FAST_SCAN_LIMIT) {
402+
maxOffset = FAST_SCAN_LIMIT;
403+
}
404+
if(!segment->readLimited(*this, maxOffset)) {
393405
debug("Failed to read segment");
394406
setValid(false);
395407
return;
396408
}
397409

398410
// Parse the elements
399-
d->segment = segment->parseSegment();
400411
d->seekHead = segment->parseSeekHead();
401412
d->cues = segment->parseCues();
402413
d->tag = segment->parseTag();

taglib/matroska/matroskaseekhead.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ bool Matroska::SeekHead::isValid(TagLib::File &file) const
5454
void Matroska::SeekHead::addEntry(const Element &element)
5555
{
5656
entries.append({element.id(), element.offset()});
57-
debug("adding to seekhead");
5857
setNeedsRender(true);
5958
}
6059

@@ -64,6 +63,11 @@ void Matroska::SeekHead::addEntry(ID id, offset_t offset)
6463
setNeedsRender(true);
6564
}
6665

66+
const List<std::pair<unsigned int, offset_t>> &Matroska::SeekHead::entryList() const
67+
{
68+
return entries;
69+
}
70+
6771
ByteVector Matroska::SeekHead::renderInternal()
6872
{
6973
const auto beforeSize = sizeRenderedOrWritten();

taglib/matroska/matroskaseekhead.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ namespace TagLib {
3939
bool isValid(TagLib::File &file) const;
4040
void addEntry(const Element &element);
4141
void addEntry(ID id, offset_t offset);
42+
const List<std::pair<unsigned int, offset_t>> &entryList() const;
4243
void write(TagLib::File &file) override;
4344
void sort();
4445
bool sizeChanged(Element &caller, offset_t delta) override;

0 commit comments

Comments
 (0)