LibVideo: Scaffold LibVideo and implement simplistic Matroska parser

This commit initializes the LibVideo library and implements parsing
basic Matroska container files. Currently, it will only parse audio
and video tracks.
This commit is contained in:
FalseHonesty 2021-06-05 16:06:55 -04:00 committed by Andreas Kling
parent 6a15bd06cb
commit 403bb07443
7 changed files with 840 additions and 0 deletions

View file

@ -258,6 +258,14 @@
#cmakedefine01 MARKDOWN_DEBUG
#endif
#ifndef MATROSKA_DEBUG
#cmakedefine01 MATROSKA_DEBUG
#endif
#ifndef MATROSKA_TRACE_DEBUG
#cmakedefine01 MATROSKA_TRACE_DEBUG
#endif
#ifndef MEMORY_DEBUG
#cmakedefine01 MEMORY_DEBUG
#endif

View file

@ -101,6 +101,8 @@ set(LOCK_TRACE_DEBUG ON)
set(LOOKUPSERVER_DEBUG ON)
set(MALLOC_DEBUG ON)
set(MARKDOWN_DEBUG ON)
set(MATROSKA_DEBUG ON)
set(MATROSKA_TRACE_DEBUG ON)
set(MASTERPTY_DEBUG ON)
set(MBR_DEBUG ON)
set(MEMORY_DEBUG ON)

View file

@ -41,6 +41,7 @@ add_subdirectory(LibTextCodec)
add_subdirectory(LibThreading)
add_subdirectory(LibTLS)
add_subdirectory(LibTTF)
add_subdirectory(LibVideo)
add_subdirectory(LibVT)
add_subdirectory(LibWasm)
add_subdirectory(LibWeb)

View file

@ -0,0 +1,7 @@
set(SOURCES
MatroskaDocument.h
MatroskaReader.cpp
)
serenity_lib(LibVideo video)
target_link_libraries(LibVideo LibAudio LibCore LibIPC)

View file

@ -0,0 +1,195 @@
/*
* Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/ByteBuffer.h>
#include <AK/FlyString.h>
#include <AK/HashMap.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/OwnPtr.h>
#include <AK/String.h>
#include <AK/Utf8View.h>
namespace Video {
struct EBMLHeader {
String doc_type;
u32 doc_type_version;
};
class SegmentInformation {
public:
u64 timestamp_scale() const { return m_timestamp_scale; }
void set_timestamp_scale(u64 timestamp_scale) { m_timestamp_scale = timestamp_scale; }
Utf8View muxing_app() const { return Utf8View(m_muxing_app); }
void set_muxing_app(String muxing_app) { m_muxing_app = move(muxing_app); }
Utf8View writing_app() const { return Utf8View(m_writing_app); }
void set_writing_app(String writing_app) { m_writing_app = move(writing_app); }
private:
u64 m_timestamp_scale { 1'000'000 };
String m_muxing_app;
String m_writing_app;
};
class TrackEntry {
public:
enum TrackType : u8 {
Invalid = 0,
Video = 1,
Audio = 2,
Complex = 3,
Logo = 16,
Subtitle = 17,
Buttons = 18,
Control = 32,
Metadata = 33,
};
struct VideoTrack {
u64 pixel_width;
u64 pixel_height;
};
struct AudioTrack {
u64 channels;
u64 bit_depth;
};
u64 track_number() const { return m_track_number; }
void set_track_number(u64 track_number) { m_track_number = track_number; }
u64 track_uid() const { return m_track_uid; }
void set_track_uid(u64 track_uid) { m_track_uid = track_uid; }
TrackType track_type() const { return m_track_type; }
void set_track_type(TrackType track_type) { m_track_type = track_type; }
FlyString language() const { return m_language; }
void set_language(const FlyString& language) { m_language = language; }
FlyString codec_id() const { return m_codec_id; }
void set_codec_id(const FlyString& codec_id) { m_codec_id = codec_id; }
Optional<VideoTrack> video_track() const
{
if (track_type() != Video)
return {};
return m_video_track;
}
void set_video_track(VideoTrack video_track) { m_video_track = video_track; }
Optional<AudioTrack> audio_track() const
{
if (track_type() != Audio)
return {};
return m_audio_track;
}
void set_audio_track(AudioTrack audio_track) { m_audio_track = audio_track; }
private:
u64 m_track_number { 0 };
u64 m_track_uid { 0 };
TrackType m_track_type { Invalid };
FlyString m_language = "eng";
FlyString m_codec_id;
union {
VideoTrack m_video_track;
AudioTrack m_audio_track;
};
};
class Block {
public:
enum Lacing : u8 {
None = 0b00,
XIPH = 0b01,
FixedSize = 0b10,
EBML = 0b11,
};
Block() = default;
u64 track_number() const { return m_track_number; }
void set_track_number(u64 track_number) { m_track_number = track_number; }
i16 timestamp() const { return m_timestamp; }
void set_timestamp(i16 timestamp) { m_timestamp = timestamp; }
bool only_keyframes() const { return m_only_keyframes; }
void set_only_keyframes(bool only_keyframes) { m_only_keyframes = only_keyframes; }
bool invisible() const { return m_invisible; }
void set_invisible(bool invisible) { m_invisible = invisible; }
Lacing lacing() const { return m_lacing; }
void set_lacing(Lacing lacing) { m_lacing = lacing; }
bool discardable() const { return m_discardable; }
void set_discardable(bool discardable) { m_discardable = discardable; }
u64 frame_count() const { return m_frames.size(); }
const ByteBuffer& frame(size_t index) const { return m_frames.at(index); }
void add_frame(const ByteBuffer& frame) { m_frames.append(move(frame)); }
private:
u64 m_track_number { 0 };
i16 m_timestamp { 0 };
bool m_only_keyframes { false };
bool m_invisible { false };
Lacing m_lacing { None };
bool m_discardable { true };
Vector<ByteBuffer> m_frames;
};
class Cluster {
public:
u64 timestamp() const { return m_timestamp; }
void set_timestamp(u64 timestamp) { m_timestamp = timestamp; }
NonnullOwnPtrVector<Block>& blocks() { return m_blocks; }
const NonnullOwnPtrVector<Block>& blocks() const { return m_blocks; }
private:
u64 m_timestamp { 0 };
NonnullOwnPtrVector<Block> m_blocks;
};
class MatroskaDocument {
public:
explicit MatroskaDocument(EBMLHeader m_header)
: m_header(move(m_header))
{
}
const EBMLHeader& header() const { return m_header; }
Optional<SegmentInformation> segment_information() const
{
if (!m_segment_information)
return {};
return *m_segment_information;
}
void set_segment_information(OwnPtr<SegmentInformation> segment_information) { m_segment_information = move(segment_information); }
const HashMap<u64, NonnullOwnPtr<TrackEntry>>& tracks() const { return m_tracks; }
Optional<TrackEntry> track_for_track_number(u64 track_number) const
{
auto track = m_tracks.get(track_number);
if (!track.has_value())
return {};
return *track.value();
}
Optional<TrackEntry> track_for_track_type(TrackEntry::TrackType type) const
{
for (auto& track_entry : m_tracks) {
if (track_entry.value->track_type() == type)
return *track_entry.value;
}
return {};
}
void add_track(u64 track_number, NonnullOwnPtr<TrackEntry> track)
{
m_tracks.set(track_number, move(track));
}
NonnullOwnPtrVector<Cluster>& clusters() { return m_clusters; }
private:
EBMLHeader m_header;
OwnPtr<SegmentInformation> m_segment_information;
HashMap<u64, NonnullOwnPtr<TrackEntry>> m_tracks;
NonnullOwnPtrVector<Cluster> m_clusters;
};
}

View file

@ -0,0 +1,458 @@
/*
* Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "MatroskaReader.h"
#include <AK/Function.h>
#include <AK/MappedFile.h>
#include <AK/Optional.h>
#include <AK/Utf8View.h>
namespace Video {
#define CHECK_HAS_VALUE(x) \
if (!(x).has_value()) \
return false
constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
constexpr u32 MUXING_APP_ID = 0x4D80;
constexpr u32 WRITING_APP_ID = 0x5741;
constexpr u32 TRACK_ENTRY_ID = 0xAE;
constexpr u32 TRACK_NUMBER_ID = 0xD7;
constexpr u32 TRACK_UID_ID = 0x73C5;
constexpr u32 TRACK_TYPE_ID = 0x83;
constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
constexpr u32 TRACK_CODEC_ID = 0x86;
constexpr u32 TRACK_VIDEO_ID = 0xE0;
constexpr u32 TRACK_AUDIO_ID = 0xE1;
constexpr u32 PIXEL_WIDTH_ID = 0xB0;
constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
constexpr u32 CHANNELS_ID = 0x9F;
constexpr u32 BIT_DEPTH_ID = 0x6264;
constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
constexpr u32 TIMESTAMP_ID = 0xE7;
OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_file(const StringView& path)
{
auto mapped_file_result = MappedFile::map(path);
if (mapped_file_result.is_error())
return {};
auto mapped_file = mapped_file_result.release_value();
return parse_matroska_from_data((u8*)mapped_file->data(), mapped_file->size());
}
OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_data(const u8* data, size_t size)
{
MatroskaReader reader(data, size);
return reader.parse();
}
OwnPtr<MatroskaDocument> MatroskaReader::parse()
{
auto first_element_id = m_streamer.read_variable_size_integer(false);
dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id.value());
if (!first_element_id.has_value() || first_element_id.value() != EBML_MASTER_ELEMENT_ID)
return {};
auto header = parse_ebml_header();
if (!header.has_value())
return {};
dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
auto root_element_id = m_streamer.read_variable_size_integer(false);
if (!root_element_id.has_value() || root_element_id.value() != SEGMENT_ELEMENT_ID)
return {};
auto matroska_document = make<MatroskaDocument>(header.value());
auto segment_parse_success = parse_segment_elements(*matroska_document);
if (!segment_parse_success)
return {};
return matroska_document;
}
bool MatroskaReader::parse_master_element([[maybe_unused]] const StringView& element_name, Function<bool(u64)> element_consumer)
{
auto element_data_size = m_streamer.read_variable_size_integer();
CHECK_HAS_VALUE(element_data_size);
dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size.value());
m_streamer.push_octets_read();
while (m_streamer.octets_read() < element_data_size.value()) {
dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
auto optional_element_id = m_streamer.read_variable_size_integer(false);
CHECK_HAS_VALUE(optional_element_id);
auto element_id = optional_element_id.value();
dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}\n", element_name, element_id);
if (!element_consumer(element_id)) {
dbgln_if(MATROSKA_DEBUG, "{:s} consumer failed on ID {:#010x}\n", element_name.to_string().characters(), element_id);
return false;
}
dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", m_streamer.octets_read(), element_name);
}
m_streamer.pop_octets_read();
return true;
}
Optional<EBMLHeader> MatroskaReader::parse_ebml_header()
{
EBMLHeader header;
auto success = parse_master_element("Header", [&](u64 element_id) {
if (element_id == DOCTYPE_ELEMENT_ID) {
auto doc_type = read_string_element();
CHECK_HAS_VALUE(doc_type);
header.doc_type = doc_type.value();
dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", doc_type.value());
} else if (element_id == DOCTYPE_VERSION_ELEMENT_ID) {
auto doc_type_version = read_u64_element();
CHECK_HAS_VALUE(doc_type_version);
header.doc_type_version = doc_type_version.value();
dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", doc_type_version.value());
} else {
return read_unknown_element();
}
return true;
});
if (!success)
return {};
return header;
}
bool MatroskaReader::parse_segment_elements(MatroskaDocument& matroska_document)
{
dbgln_if(MATROSKA_DEBUG, "Parsing segment elements");
auto success = parse_master_element("Segment", [&](u64 element_id) {
if (element_id == SEGMENT_INFORMATION_ELEMENT_ID) {
auto segment_information = parse_information();
if (!segment_information)
return false;
matroska_document.set_segment_information(move(segment_information));
} else if (element_id == TRACK_ELEMENT_ID) {
return parse_tracks(matroska_document);
} else if (element_id == CLUSTER_ELEMENT_ID) {
auto cluster = parse_cluster();
if (!cluster)
return false;
matroska_document.clusters().append(cluster.release_nonnull());
} else {
return read_unknown_element();
}
return true;
});
dbgln("Success {}", success);
return success;
}
OwnPtr<SegmentInformation> MatroskaReader::parse_information()
{
auto segment_information = make<SegmentInformation>();
auto success = parse_master_element("Segment Information", [&](u64 element_id) {
if (element_id == TIMESTAMP_SCALE_ID) {
auto timestamp_scale = read_u64_element();
CHECK_HAS_VALUE(timestamp_scale);
segment_information->set_timestamp_scale(timestamp_scale.value());
dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", timestamp_scale.value());
} else if (element_id == MUXING_APP_ID) {
auto muxing_app = read_string_element();
CHECK_HAS_VALUE(muxing_app);
segment_information->set_muxing_app(muxing_app.value());
dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", muxing_app.value());
} else if (element_id == WRITING_APP_ID) {
auto writing_app = read_string_element();
CHECK_HAS_VALUE(writing_app);
segment_information->set_writing_app(writing_app.value());
dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", writing_app.value());
} else {
return read_unknown_element();
}
return true;
});
if (!success)
return {};
return segment_information;
}
bool MatroskaReader::parse_tracks(MatroskaDocument& matroska_document)
{
auto success = parse_master_element("Tracks", [&](u64 element_id) {
if (element_id == TRACK_ENTRY_ID) {
dbgln_if(MATROSKA_DEBUG, "Parsing track");
auto track_entry = parse_track_entry();
if (!track_entry)
return false;
auto track_number = track_entry->track_number();
matroska_document.add_track(track_number, track_entry.release_nonnull());
dbgln_if(MATROSKA_DEBUG, "Track {} added to document", track_number);
} else {
return read_unknown_element();
}
return true;
});
return success;
}
OwnPtr<TrackEntry> MatroskaReader::parse_track_entry()
{
auto track_entry = make<TrackEntry>();
auto success = parse_master_element("Track", [&](u64 element_id) {
if (element_id == TRACK_NUMBER_ID) {
auto track_number = read_u64_element();
CHECK_HAS_VALUE(track_number);
track_entry->set_track_number(track_number.value());
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_number.value());
} else if (element_id == TRACK_UID_ID) {
auto track_uid = read_u64_element();
CHECK_HAS_VALUE(track_uid);
track_entry->set_track_uid(track_uid.value());
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_uid.value());
} else if (element_id == TRACK_TYPE_ID) {
auto track_type = read_u64_element();
CHECK_HAS_VALUE(track_type);
track_entry->set_track_type(static_cast<TrackEntry::TrackType>(track_type.value()));
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", track_type.value());
} else if (element_id == TRACK_LANGUAGE_ID) {
auto language = read_string_element();
CHECK_HAS_VALUE(language);
track_entry->set_language(language.value());
dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", language.value());
} else if (element_id == TRACK_CODEC_ID) {
auto codec_id = read_string_element();
CHECK_HAS_VALUE(codec_id);
track_entry->set_codec_id(codec_id.value());
dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", codec_id.value());
} else if (element_id == TRACK_VIDEO_ID) {
auto video_track = parse_video_track_information();
CHECK_HAS_VALUE(video_track);
track_entry->set_video_track(video_track.value());
} else if (element_id == TRACK_AUDIO_ID) {
auto audio_track = parse_audio_track_information();
CHECK_HAS_VALUE(audio_track);
track_entry->set_audio_track(audio_track.value());
} else {
return read_unknown_element();
}
return true;
});
if (!success)
return {};
return track_entry;
}
Optional<TrackEntry::VideoTrack> MatroskaReader::parse_video_track_information()
{
TrackEntry::VideoTrack video_track {};
auto success = parse_master_element("VideoTrack", [&](u64 element_id) {
if (element_id == PIXEL_WIDTH_ID) {
auto pixel_width = read_u64_element();
CHECK_HAS_VALUE(pixel_width);
video_track.pixel_width = pixel_width.value();
dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", pixel_width.value());
} else if (element_id == PIXEL_HEIGHT_ID) {
auto pixel_height = read_u64_element();
CHECK_HAS_VALUE(pixel_height);
video_track.pixel_height = pixel_height.value();
dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", pixel_height.value());
} else {
return read_unknown_element();
}
return true;
});
if (!success)
return {};
return video_track;
}
Optional<TrackEntry::AudioTrack> MatroskaReader::parse_audio_track_information()
{
TrackEntry::AudioTrack audio_track {};
auto success = parse_master_element("AudioTrack", [&](u64 element_id) {
if (element_id == CHANNELS_ID) {
auto channels = read_u64_element();
CHECK_HAS_VALUE(channels);
audio_track.channels = channels.value();
dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", channels.value());
} else if (element_id == BIT_DEPTH_ID) {
auto bit_depth = read_u64_element();
CHECK_HAS_VALUE(bit_depth);
audio_track.bit_depth = bit_depth.value();
dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", bit_depth.value());
} else {
return read_unknown_element();
}
return true;
});
if (!success)
return {};
return audio_track;
}
OwnPtr<Cluster> MatroskaReader::parse_cluster()
{
auto cluster = make<Cluster>();
auto success = parse_master_element("Cluster", [&](u64 element_id) {
if (element_id == SIMPLE_BLOCK_ID) {
auto simple_block = parse_simple_block();
if (!simple_block)
return false;
cluster->blocks().append(simple_block.release_nonnull());
} else if (element_id == TIMESTAMP_ID) {
auto timestamp = read_u64_element();
if (!timestamp.has_value())
return false;
cluster->set_timestamp(timestamp.value());
} else {
auto success = read_unknown_element();
if (!success)
return false;
}
return true;
});
if (!success)
return {};
return cluster;
}
OwnPtr<Block> MatroskaReader::parse_simple_block()
{
auto block = make<Block>();
auto content_size = m_streamer.read_variable_size_integer();
if (!content_size.has_value())
return {};
auto octets_read_before_track_number = m_streamer.octets_read();
auto track_number = m_streamer.read_variable_size_integer();
if (!track_number.has_value())
return {};
block->set_track_number(track_number.value());
if (m_streamer.remaining() < 3)
return {};
block->set_timestamp(m_streamer.read_i16());
auto flags = m_streamer.read_octet();
block->set_only_keyframes(flags & (1u << 7u));
block->set_invisible(flags & (1u << 3u));
block->set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
block->set_discardable(flags & 1u);
auto total_frame_content_size = content_size.value() - (m_streamer.octets_read() - octets_read_before_track_number);
if (block->lacing() == Block::Lacing::EBML) {
auto octets_read_before_frame_sizes = m_streamer.octets_read();
auto frame_count = m_streamer.read_octet() + 1;
Vector<u64> frame_sizes;
frame_sizes.ensure_capacity(frame_count);
u64 frame_size_sum = 0;
u64 previous_frame_size;
auto first_frame_size = m_streamer.read_variable_size_integer();
if (!first_frame_size.has_value())
return {};
frame_sizes.append(first_frame_size.value());
frame_size_sum += first_frame_size.value();
previous_frame_size = first_frame_size.value();
for (int i = 0; i < frame_count - 2; i++) {
auto frame_size_difference = m_streamer.read_variable_sized_signed_integer();
if (!frame_size_difference.has_value())
return {};
u64 frame_size;
if (frame_size_difference.value() < 0)
frame_size = previous_frame_size - (-frame_size_difference.value());
else
frame_size = previous_frame_size + frame_size_difference.value();
frame_sizes.append(frame_size);
frame_size_sum += frame_size;
previous_frame_size = frame_size;
}
frame_sizes.append(total_frame_content_size - frame_size_sum - (m_streamer.octets_read() - octets_read_before_frame_sizes));
for (int i = 0; i < frame_count; i++) {
auto current_frame_size = frame_sizes.at(i);
block->add_frame(ByteBuffer::copy(m_streamer.data(), current_frame_size));
m_streamer.drop_octets(current_frame_size);
}
} else if (block->lacing() == Block::Lacing::FixedSize) {
auto frame_count = m_streamer.read_octet() + 1;
auto individual_frame_size = total_frame_content_size / frame_count;
for (int i = 0; i < frame_count; i++) {
block->add_frame(ByteBuffer::copy(m_streamer.data(), individual_frame_size));
m_streamer.drop_octets(individual_frame_size);
}
} else {
block->add_frame(ByteBuffer::copy(m_streamer.data(), total_frame_content_size));
m_streamer.drop_octets(total_frame_content_size);
}
return block;
}
Optional<String> MatroskaReader::read_string_element()
{
auto string_length = m_streamer.read_variable_size_integer();
if (!string_length.has_value() || m_streamer.remaining() < string_length.value())
return {};
auto string_value = String(m_streamer.data_as_chars(), string_length.value());
m_streamer.drop_octets(string_length.value());
return string_value;
}
Optional<u64> MatroskaReader::read_u64_element()
{
auto integer_length = m_streamer.read_variable_size_integer();
if (!integer_length.has_value() || m_streamer.remaining() < integer_length.value())
return {};
u64 result = 0;
for (size_t i = 0; i < integer_length.value(); i++) {
if (!m_streamer.has_octet())
return {};
result = (result << 8u) + m_streamer.read_octet();
}
return result;
}
bool MatroskaReader::read_unknown_element()
{
auto element_length = m_streamer.read_variable_size_integer();
if (!element_length.has_value() || m_streamer.remaining() < element_length.value())
return false;
m_streamer.drop_octets(element_length.value());
return true;
}
}

View file

@ -0,0 +1,169 @@
/*
* Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include "MatroskaDocument.h"
#include <AK/Debug.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/Optional.h>
#include <AK/OwnPtr.h>
#include <math.h>
namespace Video {
class MatroskaReader {
public:
MatroskaReader(const u8* data, size_t size)
: m_streamer(data, size)
{
}
static OwnPtr<MatroskaDocument> parse_matroska_from_file(const StringView& path);
static OwnPtr<MatroskaDocument> parse_matroska_from_data(const u8*, size_t);
OwnPtr<MatroskaDocument> parse();
private:
class Streamer {
public:
Streamer(const u8* data, size_t size)
: m_data_ptr(data)
, m_size_remaining(size)
{
}
const u8* data() { return m_data_ptr; }
const char* data_as_chars() { return reinterpret_cast<const char*>(m_data_ptr); }
u8 read_octet()
{
VERIFY(m_size_remaining >= 1);
m_size_remaining--;
m_octets_read.last()++;
return *(m_data_ptr++);
}
i16 read_i16()
{
return (read_octet() << 8) | read_octet();
}
size_t octets_read() { return m_octets_read.last(); }
void push_octets_read() { m_octets_read.append(0); }
void pop_octets_read()
{
auto popped = m_octets_read.take_last();
if (!m_octets_read.is_empty())
m_octets_read.last() += popped;
}
Optional<u64> read_variable_size_integer(bool mask_length = true)
{
dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", m_data_ptr);
auto length_descriptor = read_octet();
dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
if (length_descriptor == 0)
return {};
size_t length = 0;
while (length < 8) {
if (length_descriptor & (1u << (8 - length)))
break;
length++;
}
dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
if (length > 8)
return {};
u64 result;
if (mask_length)
result = length_descriptor & ~(1u << (8 - length));
else
result = length_descriptor;
dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
for (size_t i = 1; i < length; i++) {
if (!has_octet()) {
dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
return {};
}
u8 next_octet = read_octet();
dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
result = (result << 8u) | next_octet;
dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
}
return result;
}
Optional<i64> read_variable_sized_signed_integer()
{
auto length_descriptor = read_octet();
if (length_descriptor == 0)
return {};
size_t length = 0;
while (length < 8) {
if (length_descriptor & (1u << (8 - length)))
break;
length++;
}
if (length > 8)
return {};
i64 result = length_descriptor & ~(1u << (8 - length));
for (size_t i = 1; i < length; i++) {
if (!has_octet()) {
return {};
}
u8 next_octet = read_octet();
result = (result << 8u) | next_octet;
}
result -= pow(2, length * 7 - 1) - 1;
return result;
}
void drop_octets(size_t num_octets)
{
VERIFY(m_size_remaining >= num_octets);
m_size_remaining -= num_octets;
m_octets_read.last() += num_octets;
m_data_ptr += num_octets;
}
bool at_end() const { return !m_size_remaining; }
bool has_octet() const { return m_size_remaining >= 1; }
size_t remaining() const { return m_size_remaining; }
void set_remaining(size_t remaining) { m_size_remaining = remaining; }
private:
const u8* m_data_ptr { nullptr };
size_t m_size_remaining { 0 };
Vector<size_t> m_octets_read { 0 };
};
bool parse_master_element(const StringView& element_name, Function<bool(u64 element_id)> element_consumer);
Optional<EBMLHeader> parse_ebml_header();
bool parse_segment_elements(MatroskaDocument&);
OwnPtr<SegmentInformation> parse_information();
bool parse_tracks(MatroskaDocument&);
OwnPtr<TrackEntry> parse_track_entry();
Optional<TrackEntry::VideoTrack> parse_video_track_information();
Optional<TrackEntry::AudioTrack> parse_audio_track_information();
OwnPtr<Cluster> parse_cluster();
OwnPtr<Block> parse_simple_block();
Optional<String> read_string_element();
Optional<u64> read_u64_element();
bool read_unknown_element();
Streamer m_streamer;
};
}