From 27fdf8361c454ed9de487201e4cc082dd177ac4b Mon Sep 17 00:00:00 2001 From: FalseHonesty Date: Sun, 27 Jun 2021 15:55:41 -0400 Subject: [PATCH] LibVideo/VP9: Finish implementing block decoding (6.4.4) Though technically block decoding calls into some other incomplete methods, so it isn't functionally complete yet. However, we are very close to being done with the 6.4.X sections :) --- Userland/Libraries/LibVideo/VP9/Enums.h | 10 +- Userland/Libraries/LibVideo/VP9/Parser.cpp | 107 ++++++++++++++---- Userland/Libraries/LibVideo/VP9/Parser.h | 22 +++- .../Libraries/LibVideo/VP9/TreeParser.cpp | 50 +++++--- 4 files changed, 143 insertions(+), 46 deletions(-) diff --git a/Userland/Libraries/LibVideo/VP9/Enums.h b/Userland/Libraries/LibVideo/VP9/Enums.h index 94e6ca9579..a478e7beb1 100644 --- a/Userland/Libraries/LibVideo/VP9/Enums.h +++ b/Userland/Libraries/LibVideo/VP9/Enums.h @@ -32,7 +32,7 @@ enum ColorRange { FullSwing }; -enum InterpolationFilter { +enum InterpolationFilter : u8 { EightTap = 0, EightTapSmooth = 1, EightTapSharp = 2, @@ -40,7 +40,7 @@ enum InterpolationFilter { Switchable = 4 }; -enum ReferenceFrame { +enum ReferenceFrame : u8 { // 0 is both INTRA_FRAME and NONE because the value's meaning changes depending on which index they're in on the ref_frame array None = 0, IntraFrame = 0, @@ -49,7 +49,7 @@ enum ReferenceFrame { AltRefFrame = 3, }; -enum TXMode { +enum TXMode : u8 { Only_4x4 = 0, Allow_8x8 = 1, Allow_16x16 = 2, @@ -57,14 +57,14 @@ enum TXMode { TXModeSelect = 4, }; -enum TXSize { +enum TXSize : u8 { TX_4x4 = 0, TX_8x8 = 1, TX_16x16 = 2, TX_32x32 = 3, }; -enum ReferenceMode { +enum ReferenceMode : u8 { SingleReference = 0, CompoundReference = 1, ReferenceModeSelect = 2, diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp index 2dd82741b3..edae871a7c 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.cpp +++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp @@ -21,6 +21,37 @@ Parser::Parser(Decoder& decoder) { } +Parser::~Parser() +{ + cleanup_tile_allocations(); + if (m_prev_segment_ids) + free(m_prev_segment_ids); +} + +void Parser::cleanup_tile_allocations() +{ + if (m_skips) + free(m_skips); + if (m_tx_sizes) + free(m_tx_sizes); + if (m_mi_sizes) + free(m_mi_sizes); + if (m_y_modes) + free(m_y_modes); + if (m_segment_ids) + free(m_segment_ids); + if (m_ref_frames) + free(m_ref_frames); + if (m_interp_filters) + free(m_interp_filters); + if (m_mvs) + free(m_mvs); + if (m_sub_mvs) + free(m_sub_mvs); + if (m_sub_modes) + free(m_sub_modes); +} + /* (6.1) */ bool Parser::parse_frame(ByteBuffer const& frame_data) { @@ -400,15 +431,9 @@ bool Parser::setup_past_independence() } } m_segmentation_abs_or_delta_update = false; - m_prev_segment_ids.clear(); - m_prev_segment_ids.ensure_capacity(m_mi_rows); - for (auto row = 0u; row < m_mi_rows; row++) { - Vector sub_vector = {}; - sub_vector.ensure_capacity(m_mi_cols); - for (auto col = 0u; col < m_mi_cols; col++) - sub_vector.append(0); - m_prev_segment_ids.append(sub_vector); - } + if (m_prev_segment_ids) + free(m_prev_segment_ids); + m_prev_segment_ids = static_cast(malloc(m_mi_rows * m_mi_cols)); m_loop_filter_delta_enabled = true; m_loop_filter_ref_deltas[IntraFrame] = 1; m_loop_filter_ref_deltas[LastFrame] = 0; @@ -714,10 +739,30 @@ bool Parser::setup_compound_reference_mode() return true; } +void Parser::allocate_tile_data() +{ + auto dimensions = m_mi_rows * m_mi_cols; + if (dimensions == m_allocated_dimensions) + return; + cleanup_tile_allocations(); + m_skips = static_cast(malloc(sizeof(bool) * dimensions)); + m_tx_sizes = static_cast(malloc(sizeof(TXSize) * dimensions)); + m_mi_sizes = static_cast(malloc(sizeof(u32) * dimensions)); + m_y_modes = static_cast(malloc(sizeof(u8) * dimensions)); + m_segment_ids = static_cast(malloc(sizeof(u8) * dimensions)); + m_ref_frames = static_cast(malloc(sizeof(ReferenceFrame) * dimensions * 2)); + m_interp_filters = static_cast(malloc(sizeof(InterpolationFilter) * dimensions)); + m_mvs = static_cast(malloc(sizeof(InterMode) * dimensions * 2)); + m_sub_mvs = static_cast(malloc(sizeof(InterMode) * dimensions * 2 * 4)); + m_sub_modes = static_cast(malloc(sizeof(IntraMode) * dimensions * 4)); + m_allocated_dimensions = dimensions; +} + bool Parser::decode_tiles() { auto tile_cols = 1 << m_tile_cols_log2; auto tile_rows = 1 << m_tile_rows_log2; + allocate_tile_data(); SAFE_CALL(clear_above_context()); for (auto tile_row = 0; tile_row < tile_rows; tile_row++) { for (auto tile_col = 0; tile_col < tile_cols; tile_col++) { @@ -732,7 +777,6 @@ bool Parser::decode_tiles() SAFE_CALL(m_bit_stream->exit_bool()); } } - return true; } @@ -833,8 +877,32 @@ bool Parser::decode_block(u32 row, u32 col, u8 subsize) SAFE_CALL(mode_info()); m_eob_total = 0; SAFE_CALL(residual()); - // FIXME: Finish implementing - // note: when finished, re-enable calculate_default_intra_mode_probability's usage of m_sub_modes + if (m_is_inter && subsize >= Block_8x8 && m_eob_total == 0) + m_skip = true; + for (size_t y = 0; y < num_8x8_blocks_high_lookup[subsize]; y++) { + for (size_t x = 0; x < num_8x8_blocks_wide_lookup[subsize]; x++) { + auto pos = (row + y) * m_mi_cols + (col + x); + m_skips[pos] = m_skip; + m_tx_sizes[pos] = m_tx_size; + m_mi_sizes[pos] = m_mi_size; + m_y_modes[pos] = m_y_mode; + m_segment_ids[pos] = m_segment_id; + for (size_t ref_list = 0; ref_list < 2; ref_list++) + m_ref_frames[(pos * 2) + ref_list] = m_ref_frame[ref_list]; + if (m_is_inter) { + m_interp_filters[pos] = m_interp_filter; + for (size_t ref_list = 0; ref_list < 2; ref_list++) { + auto pos_with_ref_list = pos * 2 + ref_list; + m_mvs[pos_with_ref_list] = m_block_mvs[ref_list][3]; + for (size_t b = 0; b < 4; b++) + m_sub_mvs[pos_with_ref_list * 4 + b] = m_block_mvs[ref_list][b]; + } + } else { + for (size_t b = 0; b < 4; b++) + m_sub_modes[pos * 4 + b] = static_cast(m_block_sub_modes[b]); + } + } + } return true; } @@ -916,10 +984,10 @@ bool Parser::read_tx_size(bool allow_select) bool Parser::inter_frame_mode_info() { - m_left_ref_frame[0] = m_available_l ? m_ref_frames[m_mi_row][m_mi_col - 1][0] : IntraFrame; - m_above_ref_frame[0] = m_available_u ? m_ref_frames[m_mi_row - 1][m_mi_col][0] : IntraFrame; - m_left_ref_frame[1] = m_available_l ? m_ref_frames[m_mi_row][m_mi_col - 1][1] : None; - m_above_ref_frame[1] = m_available_u ? m_ref_frames[m_mi_row - 1][m_mi_col][1] : None; + m_left_ref_frame[0] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1)] : IntraFrame; + m_above_ref_frame[0] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col] : IntraFrame; + m_left_ref_frame[1] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1) + 1] : None; + m_above_ref_frame[1] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col + 1] : None; m_left_intra = m_left_ref_frame[0] <= IntraFrame; m_above_intra = m_above_ref_frame[0] <= IntraFrame; m_left_single = m_left_ref_frame[1] <= None; @@ -973,7 +1041,7 @@ u8 Parser::get_segment_id() u8 segment = 7; for (size_t y = 0; y < ymis; y++) { for (size_t x = 0; x < xmis; x++) { - segment = min(segment, m_prev_segment_ids[m_mi_row + y][m_mi_col + x]); + segment = min(segment, m_prev_segment_ids[(m_mi_row + y) + (m_mi_col + x)]); } } return segment; @@ -1051,8 +1119,7 @@ bool Parser::inter_block_mode_info() for (auto x = 0; x < m_num_4x4_w; x++) { auto block = (idy + y) * 2 + idx + x; for (auto ref_list = 0; ref_list < 1 + is_compound; ref_list++) { - (void)block; - // TODO: m_block_mvs[ref_list][block] = m_mv[ref_list]; + m_block_mvs[ref_list][block] = m_mv[ref_list]; } } } @@ -1063,7 +1130,7 @@ bool Parser::inter_block_mode_info() SAFE_CALL(assign_mv(is_compound)); for (auto ref_list = 0; ref_list < 1 + is_compound; ref_list++) { for (auto block = 0; block < 4; block++) { - // TODO: m_block_mvs[ref_list][block] = m_mv[ref_list]; + m_block_mvs[ref_list][block] = m_mv[ref_list]; } } return true; diff --git a/Userland/Libraries/LibVideo/VP9/Parser.h b/Userland/Libraries/LibVideo/VP9/Parser.h index 0382be9fdb..8b1061652e 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.h +++ b/Userland/Libraries/LibVideo/VP9/Parser.h @@ -24,6 +24,7 @@ class Parser { public: explicit Parser(Decoder&); + ~Parser(); bool parse_frame(ByteBuffer const&); void dump_info(); @@ -45,6 +46,8 @@ private: /* Utilities */ void clear_context(Vector& context, size_t size); void clear_context(Vector>& context, size_t outer_size, size_t inner_size); + void allocate_tile_data(); + void cleanup_tile_allocations(); /* (6.1) Frame Syntax */ bool trailing_bits(); @@ -208,30 +211,39 @@ private: u8 m_uv_mode { 0 }; // FIXME: Is u8 the right size? ReferenceFrame m_left_ref_frame[2]; ReferenceFrame m_above_ref_frame[2]; - Vector>> m_ref_frames; // TODO: Can we make these fixed sized allocations? bool m_left_intra { false }; bool m_above_intra { false }; bool m_left_single { false }; bool m_above_single { false }; - Vector> m_prev_segment_ids; InterpolationFilter m_interp_filter { EightTap }; InterMode m_mv[2]; InterMode m_near_mv[2]; InterMode m_nearest_mv[2]; - Vector>> m_sub_modes; // FIXME: Can we make these fixed sized allocations? u32 m_ref_frame_width[NUM_REF_FRAMES]; u32 m_ref_frame_height[NUM_REF_FRAMES]; u32 m_eob_total { 0 }; u8 m_tx_type { 0 }; u8 m_token_cache[1024]; i32 m_tokens[1024]; - bool m_use_hp { false }; - TXMode m_tx_mode; ReferenceMode m_reference_mode; ReferenceFrame m_comp_fixed_ref; ReferenceFrame m_comp_var_ref[2]; + InterMode m_block_mvs[2][4]; + u8* m_prev_segment_ids { nullptr }; + + u32 m_allocated_dimensions { 0 }; + bool* m_skips { nullptr }; + TXSize* m_tx_sizes { nullptr }; + u32* m_mi_sizes { nullptr }; + u8* m_y_modes { nullptr }; + u8* m_segment_ids { nullptr }; + ReferenceFrame* m_ref_frames { nullptr }; + InterpolationFilter* m_interp_filters { nullptr }; + InterMode* m_mvs { nullptr }; + InterMode* m_sub_mvs { nullptr }; + IntraMode* m_sub_modes { nullptr }; OwnPtr m_bit_stream; OwnPtr m_probability_tables; diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp index 47b7a14e19..aeb71a7930 100644 --- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp @@ -208,26 +208,26 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node) { u32 above_mode, left_mode; if (m_decoder.m_mi_size >= Block_8x8) { - above_mode = false // FIXME: AVAIL_U - ? m_decoder.m_sub_modes[m_decoder.m_mi_row - 1][m_decoder.m_mi_col][2] + above_mode = AVAIL_U + ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2] : DcPred; - left_mode = false // FIXME: AVAIL_L - ? m_decoder.m_sub_modes[m_decoder.m_mi_row][m_decoder.m_mi_col - 1][1] + left_mode = AVAIL_L + ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1] : DcPred; } else { if (m_idy) { above_mode = m_decoder.m_block_sub_modes[m_idx]; } else { - above_mode = false // FIXME: AVAIL_U - ? m_decoder.m_sub_modes[m_decoder.m_mi_row - 1][m_decoder.m_mi_col][2 + m_idx] + above_mode = AVAIL_U + ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2 + m_idx] : DcPred; } if (m_idx) { left_mode = m_decoder.m_block_sub_modes[m_idy * 2]; } else { - left_mode = false // FIXME: AVAIL_L - ? m_decoder.m_sub_modes[m_decoder.m_mi_row][m_decoder.m_mi_col - 1][1 + m_idy * 2] + left_mode = AVAIL_L + ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1 + m_idy * 2] : DcPred; } } @@ -265,12 +265,10 @@ u8 TreeParser::calculate_segment_id_probability(u8 node) u8 TreeParser::calculate_skip_probability() { m_ctx = 0; - if (AVAIL_U) { - // FIXME: m_ctx += m_skips[m_mi_row - 1][m_mi_col]; - } - if (AVAIL_L) { - // FIXME: m_ctx += m_skips[m_mi_row][m_mi_col - 1]; - } + if (AVAIL_U) + m_ctx += m_decoder.m_skips[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col]; + if (AVAIL_L) + m_ctx += m_decoder.m_skips[m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1]; return m_decoder.m_probability_tables->skip_prob()[m_ctx]; } @@ -543,7 +541,16 @@ u8 TreeParser::calculate_tx_size_probability(u8 node) { auto above = m_decoder.m_max_tx_size; auto left = m_decoder.m_max_tx_size; - // FIXME: Fix varying above/left when Skips is implemented + auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col; + if (AVAIL_U && !m_decoder.m_skips[u_pos]) + above = m_decoder.m_tx_sizes[u_pos]; + auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1; + if (AVAIL_L && !m_decoder.m_skips[l_pos]) + left = m_decoder.m_tx_sizes[l_pos]; + if (!AVAIL_L) + left = above; + if (!AVAIL_U) + above = left; m_ctx = (above + left) > m_decoder.m_max_tx_size; return m_decoder.m_probability_tables->tx_probs()[m_decoder.m_max_tx_size][m_ctx][node]; } @@ -557,7 +564,18 @@ u8 TreeParser::calculate_inter_mode_probability(u8 node) u8 TreeParser::calculate_interp_filter_probability(u8 node) { - // FIXME: Implement ctx calculation when InterpFilters is implemented + auto left_interp = (AVAIL_L && m_decoder.m_left_ref_frame[0] > IntraFrame) + ? m_decoder.m_interp_filters[m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1] + : 3; + auto above_interp = (AVAIL_U && m_decoder.m_above_ref_frame[0] > IntraFrame) + ? m_decoder.m_interp_filters[m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1] + : 3; + if (left_interp == above_interp || (left_interp != 3 && above_interp == 3)) + m_ctx = left_interp; + else if (left_interp == 3 && above_interp != 3) + m_ctx = above_interp; + else + m_ctx = 3; return m_decoder.m_probability_tables->interp_filter_probs()[m_ctx][node]; }