Merge pull request #95291 from BlueCube3310/hdr-optimizations

Optimize .hdr loading and RGB9E5 conversion
This commit is contained in:
Rémi Verschelde 2024-08-16 10:35:32 +02:00
commit 886d5865a4
No known key found for this signature in database
GPG key ID: C3336907360768E1
3 changed files with 55 additions and 31 deletions

View file

@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
}
_FORCE_INLINE_ uint32_t to_rgbe9995() const {
const float pow2to9 = 512.0f;
const float B = 15.0f;
const float N = 9.0f;
// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
static const float kMaxVal = float(0x1FF << 7);
static const float kMinVal = float(1.f / (1 << 16));
float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
// Clamp RGB to [0, 1.FF*2^16]
const float _r = CLAMP(r, 0.0f, kMaxVal);
const float _g = CLAMP(g, 0.0f, kMaxVal);
const float _b = CLAMP(b, 0.0f, kMaxVal);
float cRed = MAX(0.0f, MIN(sharedexp, r));
float cGreen = MAX(0.0f, MIN(sharedexp, g));
float cBlue = MAX(0.0f, MIN(sharedexp, b));
// Compute the maximum channel, no less than 1.0*2^-15
const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
float cMax = MAX(cRed, MAX(cGreen, cBlue));
// Take the exponent of the maximum channel (rounding up the 9th bit) and
// add 15 to it. When added to the channels, it causes the implicit '1.0'
// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
// of the mantissa, rounding the truncated bits.
union {
float f;
int32_t i;
} R, G, B, E;
float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
E.f = MaxChannel;
E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
E.i &= 0x7F800000; // Zero the mantissa
float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
// This shifts the 9-bit values we need into the lowest bits, rounding as
// needed. Note that if the channel has a smaller exponent than the max
// channel, it will shift even more. This is intentional.
R.f = _r + E.f;
G.f = _g + E.f;
B.f = _b + E.f;
float exps = expp + 1.0f;
// Convert the Bias to the correct exponent in the upper 5 bits.
E.i <<= 4;
E.i += 0x10000000;
if (0.0f <= sMax && sMax < pow2to9) {
exps = expp;
}
float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
// Combine the fields. RGB floats have unwanted data in the upper 9
// bits. Only red needs to mask them off because green and blue shift
// it out to the left.
return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
}
_FORCE_INLINE_ Color blend(const Color &p_over) const {

View file

@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
imgdata.resize(height * width * (int)sizeof(uint32_t));
{
uint8_t *w = imgdata.ptrw();
uint8_t *ptr = imgdata.ptrw();
uint8_t *ptr = (uint8_t *)w;
Vector<uint8_t> temp_read_data;
temp_read_data.resize(128);
uint8_t *temp_read_ptr = temp_read_data.ptrw();
if (width < 8 || width >= 32768) {
// Read flat data
@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
}
} else {
// Dump
f->get_buffer(temp_read_ptr, count);
for (int z = 0; z < count; ++z) {
ptr[(j * width + i++) * 4 + k] = f->get_8();
ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
}
}
}
@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
}
}
const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
//convert
for (int i = 0; i < width * height; i++) {
float exp = pow(2.0f, ptr[3] - 128.0f);
int e = ptr[3] - 128;
Color c(
ptr[0] * exp / 255.0,
ptr[1] * exp / 255.0,
ptr[2] * exp / 255.0);
if (force_linear || (e < -15 || e > 15)) {
float exp = pow(2.0f, e);
Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);
if (p_flags & FLAG_FORCE_LINEAR) {
c = c.srgb_to_linear();
if (force_linear) {
c = c.srgb_to_linear();
}
*(uint32_t *)ptr = c.to_rgbe9995();
} else {
// https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
uint32_t e5 = (uint32_t)(e + 15);
*(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
}
*(uint32_t *)ptr = c.to_rgbe9995();
ptr += 4;
}
}

View file

@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
public:
virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
virtual void get_recognized_extensions(List<String> *p_extensions) const;
ImageLoaderHDR();
};