Merge pull request #95291 from BlueCube3310/hdr-optimizations

Optimize .hdr loading and RGB9E5 conversion
2024-09-30 16:38:46 +00:00 · 2024-08-16 10:35:32 +02:00 · 2024-08-16 10:35:32 +02:00 · 886d5865a4
parent b1c624beb5 80cf6cbfe9
commit 886d5865a4
3 changed files with 55 additions and 31 deletions
--- a/core/math/color.h
+++ b/core/math/color.h
@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
 	}

 	_FORCE_INLINE_ uint32_t to_rgbe9995() const {
-		const float pow2to9 = 512.0f;
-		const float B = 15.0f;
-		const float N = 9.0f;
+		// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
+		static const float kMaxVal = float(0x1FF << 7);
+		static const float kMinVal = float(1.f / (1 << 16));

-		float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
+		// Clamp RGB to [0, 1.FF*2^16]
+		const float _r = CLAMP(r, 0.0f, kMaxVal);
+		const float _g = CLAMP(g, 0.0f, kMaxVal);
+		const float _b = CLAMP(b, 0.0f, kMaxVal);

-		float cRed = MAX(0.0f, MIN(sharedexp, r));
-		float cGreen = MAX(0.0f, MIN(sharedexp, g));
-		float cBlue = MAX(0.0f, MIN(sharedexp, b));
+		// Compute the maximum channel, no less than 1.0*2^-15
+		const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));

-		float cMax = MAX(cRed, MAX(cGreen, cBlue));
+		// Take the exponent of the maximum channel (rounding up the 9th bit) and
+		// add 15 to it.  When added to the channels, it causes the implicit '1.0'
+		// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
+		// of the mantissa, rounding the truncated bits.
+		union {
+			float f;
+			int32_t i;
+		} R, G, B, E;

-		float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
+		E.f = MaxChannel;
+		E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
+		E.i &= 0x7F800000; // Zero the mantissa

-		float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
+		// This shifts the 9-bit values we need into the lowest bits, rounding as
+		// needed. Note that if the channel has a smaller exponent than the max
+		// channel, it will shift even more.  This is intentional.
+		R.f = _r + E.f;
+		G.f = _g + E.f;
+		B.f = _b + E.f;

-		float exps = expp + 1.0f;
+		// Convert the Bias to the correct exponent in the upper 5 bits.
+		E.i <<= 4;
+		E.i += 0x10000000;

-		if (0.0f <= sMax && sMax < pow2to9) {
-			exps = expp;
-		}
-
-		float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
-		float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
-		float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
-
-		return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
+		// Combine the fields. RGB floats have unwanted data in the upper 9
+		// bits. Only red needs to mask them off because green and blue shift
+		// it out to the left.
+		return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
 	}

 	_FORCE_INLINE_ Color blend(const Color &p_over) const {
--- a/modules/hdr/image_loader_hdr.cpp
+++ b/modules/hdr/image_loader_hdr.cpp
@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 	imgdata.resize(height * width * (int)sizeof(uint32_t));

 	{
-		uint8_t *w = imgdata.ptrw();
+		uint8_t *ptr = imgdata.ptrw();

-		uint8_t *ptr = (uint8_t *)w;
+		Vector<uint8_t> temp_read_data;
+		temp_read_data.resize(128);
+		uint8_t *temp_read_ptr = temp_read_data.ptrw();

 		if (width < 8 || width >= 32768) {
 			// Read flat data
@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 							}
 						} else {
 							// Dump
+							f->get_buffer(temp_read_ptr, count);
 							for (int z = 0; z < count; ++z) {
-								ptr[(j * width + i++) * 4 + k] = f->get_8();
+								ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
 							}
 						}
 					}
@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 			}
 		}

+		const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
+
 		//convert
 		for (int i = 0; i < width * height; i++) {
-			float exp = pow(2.0f, ptr[3] - 128.0f);
+			int e = ptr[3] - 128;

-			Color c(
-					ptr[0] * exp / 255.0,
-					ptr[1] * exp / 255.0,
-					ptr[2] * exp / 255.0);
+			if (force_linear || (e < -15 || e > 15)) {
+				float exp = pow(2.0f, e);
+				Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);

-			if (p_flags & FLAG_FORCE_LINEAR) {
-				c = c.srgb_to_linear();
+				if (force_linear) {
+					c = c.srgb_to_linear();
+				}
+
+				*(uint32_t *)ptr = c.to_rgbe9995();
+			} else {
+				// https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
+				uint32_t e5 = (uint32_t)(e + 15);
+				*(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
 			}

-			*(uint32_t *)ptr = c.to_rgbe9995();
 			ptr += 4;
 		}
 	}
--- a/modules/hdr/image_loader_hdr.h
+++ b/modules/hdr/image_loader_hdr.h
@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
 public:
 	virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
 	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+
 	ImageLoaderHDR();
 };