LibVideo: Abstract media container format demuxing

This creates an abstract Demuxer class to allow multiple container container formats to be easily used by video playback systems.
2024-07-22 10:36:24 +00:00 · 2022-10-29 17:02:43 -05:00 · 2022-10-29 17:02:43 -05:00 · 0a4def1208
parent 3a2f6c700d
commit 0a4def1208
8 changed files with 327 additions and 36 deletions
--- a/Userland/Applications/VideoPlayer/main.cpp
+++ b/Userland/Applications/VideoPlayer/main.cpp
@ -5,6 +5,7 @@
 */

 #include "LibVideo/Color/CodingIndependentCodePoints.h"
+#include "LibVideo/MatroskaDemuxer.h"
 #include <LibCore/ArgsParser.h>
 #include <LibCore/ElapsedTimer.h>
 #include <LibGUI/Application.h>
@ -30,19 +31,18 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    auto app = TRY(GUI::Application::try_create(arguments));
    auto window = TRY(GUI::Window::try_create());

-    auto document = Video::MatroskaReader::parse_matroska_from_file(filename);
-    // FIXME: MatroskaReader should use ErrorOr
-    if (!document) {
-        outln("{} could not be read", filename);
+    auto demuxer_result = Video::MatroskaDemuxer::from_file(filename);
+    if (demuxer_result.is_error()) {
+        outln("Error parsing Matroska: {}", demuxer_result.release_error().string_literal());
        return 1;
    }
-    auto const& optional_track = document->track_for_track_type(Video::TrackEntry::TrackType::Video);
-    if (!optional_track.has_value())
+    auto demuxer = demuxer_result.release_value();
+    auto tracks = demuxer->get_tracks_for_type(Video::TrackType::Video);
+    if (tracks.is_empty()) {
+        outln("No video tracks present.");
        return 1;
-    auto const& track = optional_track.value();
-    auto const video_track = track.video_track().value();
-
-    auto image = TRY(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRx8888, Gfx::IntSize(video_track.pixel_width, video_track.pixel_height)));
+    }
+    auto track = tracks[0];

    auto main_widget = TRY(window->try_set_main_widget<GUI::Widget>());
    main_widget->set_fill_with_background_color(true);
@ -50,33 +50,18 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    auto image_widget = TRY(main_widget->try_add<GUI::ImageWidget>());

    OwnPtr<Video::VideoDecoder> decoder = make<Video::VP9::Decoder>();
-    size_t cluster_index = 0;
-    size_t block_index = 0;
-    size_t frame_index = 0;
    auto frame_number = 0u;

-    auto get_next_sample = [&]() -> Optional<ByteBuffer> {
-        for (; cluster_index < document->clusters().size(); cluster_index++) {
-            for (; block_index < document->clusters()[cluster_index].blocks().size(); block_index++) {
-                auto const& candidate_block = document->clusters()[cluster_index].blocks()[block_index];
-                if (candidate_block.track_number() != track.track_number())
-                    continue;
-                if (frame_index < candidate_block.frames().size())
-                    return candidate_block.frame(frame_index);
-                frame_index = 0;
-            }
-            block_index = 0;
-        }
-        return {};
-    };
-
    auto display_next_frame = [&]() {
-        auto optional_sample = get_next_sample();
+        auto sample_result = demuxer->get_next_video_sample_for_track(track);

-        if (!optional_sample.has_value())
+        if (sample_result.is_error()) {
+            outln("Error demuxing next sample {}: {}", frame_number, sample_result.release_error().string_literal());
            return;
+        }

-        auto result = decoder->receive_sample(optional_sample.release_value());
+        auto sample = sample_result.release_value();
+        auto result = decoder->receive_sample(sample->data());

        if (result.is_error()) {
            outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
@ -91,23 +76,22 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
        auto frame = frame_result.release_value();

        auto& cicp = frame->cicp();
-        cicp.adopt_specified_values(video_track.color_format.to_cicp());
+        cicp.adopt_specified_values(sample->container_cicp());
        cicp.default_code_points_if_unspecified({ Video::ColorPrimaries::BT709, Video::TransferCharacteristics::BT709, Video::MatrixCoefficients::BT709, Video::ColorRange::Studio });

-        auto convert_result = frame->output_to_bitmap(image);
+        auto convert_result = frame->to_bitmap();
        if (convert_result.is_error()) {
            outln("Error creating bitmap for frame {}: {}", frame_number, convert_result.error().string_literal());
            return;
        }

-        image_widget->set_bitmap(image);
+        image_widget->set_bitmap(convert_result.release_value());
+        image_widget->set_fixed_size(frame->size());
        image_widget->update();

-        frame_index++;
        frame_number++;
    };

-    image_widget->set_fixed_size(video_track.pixel_width, video_track.pixel_height);
    image_widget->on_click = [&]() { display_next_frame(); };

    if (benchmark) {
--- a/Userland/Libraries/LibVideo/CMakeLists.txt
+++ b/Userland/Libraries/LibVideo/CMakeLists.txt
@ -2,6 +2,7 @@ set(SOURCES
    Color/ColorConverter.cpp
    Color/ColorPrimaries.cpp
    Color/TransferCharacteristics.cpp
+    MatroskaDemuxer.cpp
    MatroskaReader.cpp
    VideoFrame.cpp
    VP9/BitStream.cpp
--- a/Userland/Libraries/LibVideo/DecoderError.h
+++ b/Userland/Libraries/LibVideo/DecoderError.h
@ -22,6 +22,7 @@ using DecoderErrorOr = ErrorOr<T, DecoderError>;
 enum class DecoderErrorCategory : u32 {
    Unknown,
    IO,
+    EndOfStream,
    Memory,
    // The input is corrupted.
    Corrupted,
--- a/Userland/Libraries/LibVideo/Demuxer.h
+++ b/Userland/Libraries/LibVideo/Demuxer.h
@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/NonnullOwnPtr.h>
+#include <LibCore/Object.h>
+
+#include "DecoderError.h"
+#include "Sample.h"
+#include "Track.h"
+
+namespace Video {
+
+class Demuxer {
+public:
+    virtual ~Demuxer() = default;
+
+    virtual Vector<Track> get_tracks_for_type(TrackType type) = 0;
+
+    DecoderErrorOr<NonnullOwnPtr<VideoSample>> get_next_video_sample_for_track(Track track)
+    {
+        VERIFY(track.type() == TrackType::Video);
+        auto sample = TRY(get_next_sample_for_track(track));
+        VERIFY(sample->is_video_sample());
+        return sample.release_nonnull<VideoSample>();
+    }
+
+    virtual DecoderErrorOr<void> seek_to_most_recent_keyframe(Track track, size_t timestamp) = 0;
+
+    virtual Time duration() = 0;
+
+protected:
+    virtual DecoderErrorOr<NonnullOwnPtr<Sample>> get_next_sample_for_track(Track track) = 0;
+};
+
+}
--- a/Userland/Libraries/LibVideo/MatroskaDemuxer.cpp
+++ b/Userland/Libraries/LibVideo/MatroskaDemuxer.cpp
@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "MatroskaDemuxer.h"
+
+namespace Video {
+
+DecoderErrorOr<NonnullOwnPtr<MatroskaDemuxer>> MatroskaDemuxer::from_file(StringView filename)
+{
+    // FIXME: MatroskaReader should return errors.
+    auto nullable_document = MatroskaReader::parse_matroska_from_file(filename);
+    if (!nullable_document)
+        return DecoderError::format(DecoderErrorCategory::IO, "Failed to open matroska from file '{}'", filename);
+    auto document = nullable_document.release_nonnull();
+    return make<MatroskaDemuxer>(document);
+}
+
+DecoderErrorOr<NonnullOwnPtr<MatroskaDemuxer>> MatroskaDemuxer::from_data(Span<u8 const> data)
+{
+    // FIXME: MatroskaReader should return errors.
+    auto nullable_document = MatroskaReader::parse_matroska_from_data(data.data(), data.size());
+    if (!nullable_document)
+        return DecoderError::format(DecoderErrorCategory::IO, "Failed to open matroska from data");
+    auto document = nullable_document.release_nonnull();
+    return make<MatroskaDemuxer>(document);
+}
+
+Vector<Track> MatroskaDemuxer::get_tracks_for_type(TrackType type)
+{
+    Video::TrackEntry::TrackType matroska_track_type;
+
+    switch (type) {
+    case TrackType::Video:
+        matroska_track_type = Video::TrackEntry::TrackType::Video;
+        break;
+    case TrackType::Audio:
+        matroska_track_type = Video::TrackEntry::TrackType::Audio;
+        break;
+    case TrackType::Subtitles:
+        matroska_track_type = Video::TrackEntry::TrackType::Subtitle;
+        break;
+    }
+
+    Vector<Track> tracks;
+
+    for (auto const& track_table_entry : m_document->tracks()) {
+        auto const& track_entry = track_table_entry.value;
+        if (matroska_track_type == track_entry->track_type())
+            tracks.append(Track(type, track_entry->track_number()));
+    }
+
+    // FIXME: Sort the vector, presumably the hashtable will not have a consistent order.
+    return tracks;
+}
+
+ErrorOr<MatroskaDemuxer::TrackStatus*> MatroskaDemuxer::get_track_status(Track track)
+{
+    if (!m_track_statuses.contains(track))
+        TRY(m_track_statuses.try_set(track, TrackStatus()));
+
+    return &m_track_statuses.get(track).release_value();
+}
+
+DecoderErrorOr<void> MatroskaDemuxer::seek_to_most_recent_keyframe(Track track, size_t timestamp)
+{
+    if (timestamp == 0) {
+        auto track_status = DECODER_TRY_ALLOC(get_track_status(track));
+        track_status->m_cluster_index = 0;
+        track_status->m_block_index = 0;
+        track_status->m_frame_index = 0;
+        return {};
+    }
+
+    return DecoderError::not_implemented();
+}
+
+DecoderErrorOr<NonnullOwnPtr<Sample>> MatroskaDemuxer::get_next_sample_for_track(Track track)
+{
+    auto track_status = DECODER_TRY_ALLOC(get_track_status(track));
+
+    for (; track_status->m_cluster_index < m_document->clusters().size(); track_status->m_cluster_index++) {
+        auto const& cluster = m_document->clusters()[track_status->m_cluster_index];
+        for (; track_status->m_block_index < cluster.blocks().size(); track_status->m_block_index++) {
+            auto const& block = cluster.blocks()[track_status->m_block_index];
+            if (block.track_number() != track.identifier())
+                continue;
+            if (track_status->m_frame_index < block.frame_count()) {
+                switch (track.type()) {
+                case TrackType::Video: {
+                    // FIXME: This makes a copy of the sample, which shouldn't be necessary.
+                    //        Matroska should make a RefPtr<ByteBuffer>, probably.
+                    auto cicp = m_document->track_for_track_number(track.identifier())->video_track()->color_format.to_cicp();
+                    Time timestamp = Time::from_nanoseconds((cluster.timestamp() + block.timestamp()) * m_document->segment_information()->timestamp_scale());
+                    return make<VideoSample>(block.frame(track_status->m_frame_index++), cicp, timestamp);
+                }
+                default:
+                    return DecoderError::not_implemented();
+                }
+            }
+            track_status->m_frame_index = 0;
+        }
+        track_status->m_block_index = 0;
+    }
+    return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream reached."sv);
+}
+
+Time MatroskaDemuxer::duration()
+{
+    if (!m_document->segment_information().has_value())
+        return Time::zero();
+    if (!m_document->segment_information()->duration().has_value())
+        return Time::zero();
+    return Time::from_nanoseconds(m_document->segment_information()->duration().value() * m_document->segment_information()->timestamp_scale());
+}
+
+}
--- a/Userland/Libraries/LibVideo/MatroskaDemuxer.h
+++ b/Userland/Libraries/LibVideo/MatroskaDemuxer.h
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/HashMap.h>
+
+#include "Demuxer.h"
+#include "MatroskaReader.h"
+
+namespace Video {
+
+class MatroskaDemuxer final : public Demuxer {
+public:
+    // FIXME: We should instead accept some abstract data streaming type so that the demuxer
+    //        can work with non-contiguous data.
+    static DecoderErrorOr<NonnullOwnPtr<MatroskaDemuxer>> from_file(StringView filename);
+    static DecoderErrorOr<NonnullOwnPtr<MatroskaDemuxer>> from_data(Span<u8 const> data);
+
+    MatroskaDemuxer(NonnullOwnPtr<MatroskaDocument>& document)
+        : m_document(move(document))
+    {
+    }
+
+    Vector<Track> get_tracks_for_type(TrackType type) override;
+
+    DecoderErrorOr<void> seek_to_most_recent_keyframe(Track track, size_t timestamp) override;
+
+    Time duration() override;
+
+protected:
+    DecoderErrorOr<NonnullOwnPtr<Sample>> get_next_sample_for_track(Track track) override;
+
+private:
+    struct TrackStatus {
+        size_t m_cluster_index { 0 };
+        size_t m_block_index { 0 };
+        size_t m_frame_index { 0 };
+    };
+
+    ErrorOr<TrackStatus*> get_track_status(Track track);
+
+    NonnullOwnPtr<MatroskaDocument> m_document;
+
+    HashMap<Track, TrackStatus> m_track_statuses;
+};
+
+}
--- a/Userland/Libraries/LibVideo/Sample.h
+++ b/Userland/Libraries/LibVideo/Sample.h
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+#include <AK/Time.h>
+#include <LibVideo/Color/CodingIndependentCodePoints.h>
+
+namespace Video {
+
+class Sample {
+public:
+    virtual ~Sample() = default;
+
+    virtual bool is_video_sample() const { return false; }
+};
+
+class VideoSample : public Sample {
+public:
+    VideoSample(ByteBuffer const& data, CodingIndependentCodePoints container_cicp, Time timestamp)
+        : m_data(data)
+        , m_container_cicp(container_cicp)
+        , m_timestamp(timestamp)
+    {
+    }
+
+    bool is_video_sample() const override { return true; }
+    ByteBuffer const& data() const { return m_data; }
+    CodingIndependentCodePoints container_cicp() const { return m_container_cicp; }
+    Time timestamp() const { return m_timestamp; }
+
+private:
+    ByteBuffer m_data;
+    CodingIndependentCodePoints m_container_cicp;
+    Time m_timestamp;
+};
+
+// FIXME: Add samples for audio, subtitles, etc.
+
+}
--- a/Userland/Libraries/LibVideo/Track.h
+++ b/Userland/Libraries/LibVideo/Track.h
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/HashFunctions.h>
+#include <AK/Traits.h>
+#include <AK/Types.h>
+
+namespace Video {
+
+enum class TrackType : u32 {
+    Video,
+    Audio,
+    Subtitles,
+};
+
+struct Track {
+public:
+    Track(TrackType type, size_t identifier)
+        : m_type(type)
+        , m_identifier(identifier)
+    {
+    }
+
+    TrackType type() { return m_type; }
+    size_t identifier() const { return m_identifier; }
+
+    bool operator==(Track const& other) const
+    {
+        return m_type == other.m_type && m_identifier == other.m_identifier;
+    }
+    unsigned hash() const
+    {
+        return pair_int_hash(to_underlying(m_type), m_identifier);
+    }
+
+private:
+    TrackType m_type;
+    size_t m_identifier;
+};
+
+}
+
+template<>
+struct AK::Traits<Video::Track> : public GenericTraits<Video::Track> {
+    static unsigned hash(Video::Track const& t) { return t.hash(); }
+};