Camera: optimize some loops

Factor out some conditions in RAW8 loop.
Use floats YUV loop.
Replace pixel getters with simple pointers.
This commit is contained in:
Megamouse 2024-04-21 00:10:20 +02:00
parent e2402b1c8f
commit e32c48d0dd
3 changed files with 83 additions and 58 deletions

View file

@ -22,6 +22,11 @@
LOG_CHANNEL(cellGem);
static inline constexpr u32 rgba(u8 r, u8 g, u8 b, u8 a)
{
return ((r & 0xffu) << 24) | ((g & 0xffu) << 16) | ((b & 0xffu) << 8) | (a & 0xffu);
}
template <>
void fmt_class_string<gem_btn>::format(std::string& out, u64 arg)
{
@ -493,42 +498,39 @@ void gem_config_data::operator()()
{
constexpr u32 in_pitch = 640;
constexpr u32 out_pitch = 640 * 4;
u8* dst = vc_attribute.video_data_out.get_ptr();
for (u32 y = 0; y < 480 - 1; y += 2)
{
const u8* src = &video_data_in[y * in_pitch];
const u16* src0 = reinterpret_cast<const u16*>(src);
const u16* src1 = reinterpret_cast<const u16*>(src + in_pitch);
u8* dst_row = dst + y * out_pitch;
u32* dst0 = reinterpret_cast<u32*>(dst_row);
u32* dst1 = reinterpret_cast<u32*>(dst_row + out_pitch);
for (u32 x = 0; x < 640 - 1; x += 2)
{
const u32 in_offset = 1 * (y * 640 + x);
const u32 out_offset = 4 * (y * 640 + x);
const u16 top = *src0++;
const u16 bottom = *src1++;
const u8 b = video_data_in[in_offset + 0];
const u8 g0 = video_data_in[in_offset + 1];
const u8 g1 = video_data_in[in_offset + in_pitch + 0];
const u8 r = video_data_in[in_offset + in_pitch + 1];
const u8 b = (top & 0xFF);
const u8 g0 = ((top >> 8) & 0xFF);
const u8 g1 = (bottom & 0xFF);
const u8 r = ((bottom >> 8) & 0xFF);
// Top-Left
vc_attribute.video_data_out[out_offset + 0] = r; // R
vc_attribute.video_data_out[out_offset + 1] = g0; // G
vc_attribute.video_data_out[out_offset + 2] = b; // B
vc_attribute.video_data_out[out_offset + 3] = 255; // A
*dst0++ = rgba(r, g0, b, 255);
// Top-Right Pixel
vc_attribute.video_data_out[out_offset + 4] = r; // R
vc_attribute.video_data_out[out_offset + 5] = g0; // G
vc_attribute.video_data_out[out_offset + 6] = b; // B
vc_attribute.video_data_out[out_offset + 7] = 255; // A
*dst0++ = rgba(r, g0, b, 255);
// Bottom-Left Pixel
vc_attribute.video_data_out[out_offset + out_pitch + 0] = r; // R
vc_attribute.video_data_out[out_offset + out_pitch + 1] = g1; // G
vc_attribute.video_data_out[out_offset + out_pitch + 2] = b; // B
vc_attribute.video_data_out[out_offset + out_pitch + 3] = 255; // A
*dst1++ = rgba(r, g1, b, 255);
// Bottom-Right Pixel
vc_attribute.video_data_out[out_offset + out_pitch + 4] = r; // R
vc_attribute.video_data_out[out_offset + out_pitch + 5] = g1; // G
vc_attribute.video_data_out[out_offset + out_pitch + 6] = b; // B
vc_attribute.video_data_out[out_offset + out_pitch + 7] = 255; // A
*dst1++ = rgba(r, g1, b, 255);
}
}
}

View file

@ -98,6 +98,9 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
// TODO: check if pixel format and bytes per pixel match and convert if necessary
// TODO: implement or improve more conversions
const u32 width = std::min<u32>(image_buffer.width, image.width());
const u32 height = std::min<u32>(image_buffer.height, image.height());
switch (m_format)
{
case CELL_CAMERA_JPG:
@ -107,27 +110,46 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
case CELL_CAMERA_RAW8: // The game seems to expect BGGR
{
// Let's use a very simple algorithm to convert the image to raw BGGR
const auto convert_to_bggr = [&image_buffer, &image](u32 y_begin, u32 y_end)
const auto convert_to_bggr = [&image_buffer, &image, width, height](u32 y_begin, u32 y_end)
{
for (u32 y = y_begin; y < std::min<u32>(image_buffer.height, image.height()) && y < y_end; y++)
{
for (u32 x = 0; x < std::min<u32>(image_buffer.width, image.width()); x++)
{
u8& pixel = image_buffer.data[image_buffer.width * y + x];
const bool is_left_pixel = (x % 2) == 0;
const bool is_top_pixel = (y % 2) == 0;
u8* dst = &image_buffer.data[image_buffer.width * y_begin];
if (is_left_pixel && is_top_pixel)
for (u32 y = y_begin; y < height && y < y_end; y++)
{
const QRgb* src = reinterpret_cast<const QRgb*>(image.constScanLine(y));
const bool is_top_pixel = (y % 2) == 0;
// Split loops (roughly twice the performance by removing one condition)
if (is_top_pixel)
{
for (u32 x = 0; x < width; x++, dst++, src++)
{
pixel = qBlue(image.pixel(x, y));
const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel)
{
*dst = qBlue(*src);
}
else
{
*dst = qGreen(*src);
}
}
else if (is_left_pixel || is_top_pixel)
}
else
{
for (u32 x = 0; x < width; x++, dst++, src++)
{
pixel = qGreen(image.pixel(x, y));
}
else
{
pixel = qRed(image.pixel(x, y));
const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel)
{
*dst = qGreen(*src);
}
else
{
*dst = qRed(*src);
}
}
}
}
@ -154,7 +176,7 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
case CELL_CAMERA_V_Y1_U_Y0:
{
// Simple RGB to Y0_U_Y1_V conversion from stackoverflow.
const auto convert_to_yuv422 = [&image_buffer, &image, format = m_format](u32 y_begin, u32 y_end)
const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end)
{
constexpr int yuv_bytes_per_pixel = 2;
const int yuv_pitch = image_buffer.width * yuv_bytes_per_pixel;
@ -164,32 +186,33 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
const int y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1;
const int v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0;
for (u32 y = y_begin; y < std::min<u32>(image_buffer.height, image.height()) && y < y_end; y++)
for (u32 y = y_begin; y < height && y < y_end; y++)
{
const QRgb* src = reinterpret_cast<const QRgb*>(image.constScanLine(y));
uint8_t* yuv_row_ptr = &image_buffer.data[y * yuv_pitch];
for (u32 x = 0; x < std::min<u32>(image_buffer.width, image.width()) - 1; x += 2)
for (u32 x = 0; x < width - 1; x += 2)
{
const QRgb pixel_1 = image.pixel(x, y);
const QRgb pixel_2 = image.pixel(x + 1, y);
const QRgb pixel_1 = *src++;
const QRgb pixel_2 = *src++;
const double r1 = qRed(pixel_1);
const double g1 = qGreen(pixel_1);
const double b1 = qBlue(pixel_1);
const double r2 = qRed(pixel_2);
const double g2 = qGreen(pixel_2);
const double b2 = qBlue(pixel_2);
const float r1 = qRed(pixel_1);
const float g1 = qGreen(pixel_1);
const float b1 = qBlue(pixel_1);
const float r2 = qRed(pixel_2);
const float g2 = qGreen(pixel_2);
const float b2 = qBlue(pixel_2);
const int y0 = (0.257 * r1) + (0.504 * g1) + (0.098 * b1) + 16.0;
const int u = -(0.148 * r1) - (0.291 * g1) + (0.439 * b1) + 128.0;
const int v = (0.439 * r1) - (0.368 * g1) - (0.071 * b1) + 128.0;
const int y1 = (0.257 * r2) + (0.504 * g2) + (0.098 * b2) + 16.0;
const int y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f;
const int u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f;
const int v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f;
const int y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f;
const int yuv_index = x * yuv_bytes_per_pixel;
yuv_row_ptr[yuv_index + y0_offset] = std::max<u8>(0, std::min<u8>(y0, 255));
yuv_row_ptr[yuv_index + u_offset] = std::max<u8>(0, std::min<u8>( u, 255));
yuv_row_ptr[yuv_index + y1_offset] = std::max<u8>(0, std::min<u8>(y1, 255));
yuv_row_ptr[yuv_index + v_offset] = std::max<u8>(0, std::min<u8>( v, 255));
yuv_row_ptr[yuv_index + y0_offset] = static_cast<u8>(std::clamp(y0, 0, 255));
yuv_row_ptr[yuv_index + u_offset] = static_cast<u8>(std::clamp( u, 0, 255));
yuv_row_ptr[yuv_index + y1_offset] = static_cast<u8>(std::clamp(y1, 0, 255));
yuv_row_ptr[yuv_index + v_offset] = static_cast<u8>(std::clamp( v, 0, 255));
}
}
};

View file

@ -271,7 +271,7 @@ namespace gui
for (int y = 0; y < image.height(); ++y)
{
QRgb* row = reinterpret_cast<QRgb*>(image.scanLine(y));
const QRgb* row = reinterpret_cast<const QRgb*>(image.constScanLine(y));
bool row_filled = false;
for (int x = 0; x < image.width(); ++x)