1
0
mirror of https://github.com/libretro/RetroArch synced 2024-07-03 00:38:44 +00:00

(VITA) Baked math-neon and vitaGL (#9757)

* Squashed 'deps/math-neon/' content from commit 0050735

git-subtree-dir: deps/math-neon
git-subtree-split: 0050735ae8f18281c1e6fbe2dc80546e402b7fc5

* Squashed 'deps/vitaGL/' content from commit 694b387

git-subtree-dir: deps/vitaGL
git-subtree-split: 694b387a6eacf7e179f07ff621e5772ae4253315

* (Vita) Add baked math-neon and vitaGL
This commit is contained in:
Francisco José García García 2019-11-20 12:13:33 +01:00 committed by Twinaphex
parent 932071952d
commit c8067ba0c0
106 changed files with 14033 additions and 3 deletions

View File

@ -2045,6 +2045,20 @@ ifeq ($(HAVE_RPILED), 1)
OBJ += led/drivers/led_rpi.o
endif
ifeq ($(HAVE_MATH_NEON), 1)
DEFINES += -DHAVE_MATH_NEON
INCLUDE_DIRS += -I$(DEPS_DIR)/math-neon/source
SOURCES := $(DEPS_DIR)/math-neon/source
OBJ += $(patsubst %.c,%.o,$(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c)))
endif
ifeq ($(HAVE_VITAGL), 1)
DEFINES += -DHAVE_VITAGL
INCLUDE_DIRS += -I$(DEPS_DIR)/vitaGL/source
SOURCES := $(DEPS_DIR)/vitaGL/source $(DEPS_DIR)/vitaGL/source/utils
OBJ += $(patsubst %.c,%.o,$(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c)))
endif
##################################
### Classic Platform specifics ###
###############WIP################

View File

@ -26,6 +26,7 @@ ifeq ($(GRIFFIN_BUILD), 1)
else
HAVE_NEON := 1
HAVE_MATH_NEON := 1
HAVE_FILTERS_BUILTIN := 1
HAVE_LANGEXTRA := 1
HAVE_RPNG := 1
@ -86,7 +87,7 @@ LD := $(CXX)
LIBDIRS := -L.
ARCHFLAGS := -march=armv7-a -mfpu=neon -mfloat-abi=hard -DVITA
CFLAGS += $(ARCHFLAGS) -mword-relocations -fno-optimize-sibling-calls -O2
CFLAGS += $(ARCHFLAGS) -mword-relocations -fno-optimize-sibling-calls -O2 -flto
ifeq ($(DEBUG), 1)
CFLAGS += -g
@ -116,8 +117,7 @@ CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions
VITA_LIBS := -lSceDisplay_stub -lSceGxm_stub -lSceNet_stub -lSceNetCtl_stub -lSceAppUtil_stub \
-lSceSysmodule_stub -lSceCtrl_stub -lSceHid_stub -lSceTouch_stub -lSceAudio_stub \
-lScePower_stub -lSceRtc_stub -lSceCommonDialog_stub -lScePgf_stub -lSceMotion_stub \
-lSceFiber_stub -lSceMotion_stub -lSceAppMgr_stub -lpthread -lpng -lz -lvitagl \
-lmathneon
-lSceFiber_stub -lSceMotion_stub -lSceAppMgr_stub -lpthread -lpng -lz
LIBS := $(WHOLE_START) -lretro_vita $(WHOLE_END) $(VITA_LIBS) -lm -lc

17
deps/math-neon/.gitattributes vendored Normal file
View File

@ -0,0 +1,17 @@
# Auto detect text files and perform LF normalization
* text=auto
# Custom for Visual Studio
*.cs diff=csharp
# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain

26
deps/math-neon/.gitignore vendored Normal file
View File

@ -0,0 +1,26 @@
*.o
*.a
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
# =========================
# Operating System Files
# =========================

29
deps/math-neon/Makefile vendored Normal file
View File

@ -0,0 +1,29 @@
TARGET := libmathneon
SOURCES := source
LIBS = -lc -lm -lSceGxm_stub -lSceDisplay_stub
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CGFILES := $(foreach dir,$(SHADERS), $(wildcard $(dir)/*.cg))
HEADERS := $(CGFILES:.cg=.h)
OBJS := $(CFILES:.c=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
AR = $(PREFIX)-gcc-ar
CFLAGS = -g -Wl,-q -O2 -ffast-math -mtune=cortex-a9 -mfpu=neon -flto -ftree-vectorize
ASFLAGS = $(CFLAGS)
all: $(TARGET).a
$(TARGET).a: $(OBJS)
$(AR) -rc $@ $^
clean:
@rm -rf $(TARGET).a $(TARGET).elf $(OBJS)
install: $(TARGET).a
@mkdir -p $(VITASDK)/$(PREFIX)/lib/
cp $(TARGET).a $(VITASDK)/$(PREFIX)/lib/
@mkdir -p $(VITASDK)/$(PREFIX)/include/
cp source/math_neon.h $(VITASDK)/$(PREFIX)/include/

169
deps/math-neon/README vendored Normal file
View File

@ -0,0 +1,169 @@
Library: MATH-NEON
By: Lachlan Tychsen-Smith
Licence: MIT (expat)
=======================================================================================
This project implements the cmath functions and some optimised matrix functions
with the aim of increasing the floating point performance of ARM Cortex A-8
based platforms. As well as implementing the functions in ARM NEON assembly,
they sacrifice error checking and some accuracy to achieve better performance.
Function Errors:
=======================================================================================
The measurement and characterisations of the inaccuracies present within these
functions is really a field within itself. For the benchmark i provide the
maximum absolute, maximum relative and root mean squared error compared to the
cmath implementations over the specified range. However these values can be
misleading, especially for functions which quickly go to infinity. So its always a
good idea to test it within your actual program. In general, this library will not
be as accurate as cmath, however for many functions it is close enough to be
negilible.
Notes:
=======================================================================================
- The *_c functions are c implementations of the *_neon code.
- Like cmath, The errors present in the functions are very dependent on the
range which your operating in. So you should test them first.
- Look in the "math_neon.h" file for discriptions of the functions. In some
function files there are also notes on the specific implementation.
- The *_neon functions make certain assumptions about the location of arguments
that is incompatible with inlining.
Contact:
=======================================================================================
Name: Lachlan Tychsen-Smith
Email: lachlan.ts@gmail.com
PSVITA performances test results:
RUNFAST: Enabled
------------------------------------------------------------------------------------------------------
MATRIX FUNCTION TESTS
------------------------------------------------------------------------------------------------------
matmul2_c =
|-7.16, 9.42|
|17.86, -10.70|
matmul2_neon =
|-7.16, 9.42|
|17.86, -10.70|
matmul2: c=183985 neon=87480 rate=2.10
matvec2_c = |-7.16, 17.86|
matvec2_neon = |-7.16, 17.86|
matvec2: c=98178 neon=66040 rate=1.49
matmul3_c =
|11.14, -0.78, -3.98|
|16.56, 17.96, 23.58|
|8.73, -0.18, 1.57|
matmul3_neon =
|11.14, -0.78, -3.98|
|16.56, 17.96, 23.58|
|8.73, -0.18, 1.57|
matmul3: c=551838 neon=340292 rate=1.62
matvec3_c = |11.14, 16.56, 8.73|
matvec3_neon = |11.14, 16.56, 8.73|
matvec3: c=98178 neon=66040 rate=1.49
matmul4_c =
|17.91, -23.96, 1.86, 16.53|
|4.10, -18.16, 4.17, 29.06|
|6.92, -1.60, 3.12, 27.81|
|-15.13, -7.46, -17.91, 22.49|
matmul4_neon =
|17.91, -23.96, 1.86, 16.53|
|4.10, -18.16, 4.17, 29.06|
|6.92, -1.60, 3.12, 27.81|
|-15.13, -7.46, -17.91, 22.49|
matmul4: c=1316131 neon=315444 rate=4.17
matvec4_c = |17.91, 4.10, 6.92, -15.126419|
matvec4_neon = |17.91, 4.10, 6.92, -15.126419|
matvec4: c=98178 neon=66040 rate=1.49
dot2_c = 5.804099
dot2_neon = 5.804099
dot2: c=291526 neon=307025 rate=0.95
normalize2_c = [0.97, 0.24]
normalize2_neon = [0.97, 0.24]
normalize2: c=1058588 neon=965696 rate=1.10
dot3_c = -0.817487
dot3_neon = -0.817487
dot3: c=322094 neon=444834 rate=0.72
normalize3_c = [0.50, 0.12, -0.86]
normalize3_neon = [0.50, 0.12, -0.86]
normalize3: c=1257201 neon=1134375 rate=1.11
cross3_c = [-13.16, -17.29, -10.19]
cross3_neon = [-13.16, -17.29, -10.19]
cross3: c=705298 neon=766477 rate=0.92
dot4_c = -7.880241
dot4_neon = -7.880241
dot4: c=414431 neon=506460 rate=0.82
normalize4_c = [0.45, 0.11, -0.77, -0.44]
normalize4_neon = [0.45, 0.11, -0.77, -0.44]
normalize4: c=1410727 neon=1102802 rate=1.28
------------------------------------------------------------------------------------------------------
CMATH FUNCTION TESTS
------------------------------------------------------------------------------------------------------
Function Range Number ABS Max Error REL Max Error RMS Error Time Rate
------------------------------------------------------------------------------------------------------
sinf [-3.14, 3.14] 500000 0.00e+00 0.00e+00% 0.00e+00 647042739 x1.00
sinf_c [-3.14, 3.14] 500000 7.75e-07 1.00e+02% 4.09e-07 646276691 x1.00
sinf_neon [-3.14, 3.14] 500000 1.00e+00 1.00e+02% 7.07e-01 645546381 x1.00
cosf [-3.14, 3.14] 500000 0.00e+00 0.00e+00% 0.00e+00 644742077 x1.00
cosf_c [-3.14, 3.14] 500000 7.75e-07 6.74e-01% 4.15e-07 643957358 x1.00
cosf_neon [-3.14, 3.14] 500000 1.00e+00 1.00e+02% 7.06e-01 643211256 x1.00
tanf [-0.79, 0.79] 500000 0.00e+00 0.00e+00% 0.00e+00 642444112 x1.00
tanf_c [-0.79, 0.79] 500000 2.98e-06 7.94e-04% 1.31e-06 641628507 x1.00
tanf_neon [-0.79, 0.79] 500000 1.00e+00 1.00e+02% nan 640740514 x1.00
asinf [-1.00, 1.00] 500000 0.00e+00 0.00e+00% 0.00e+00 639560380 x1.00
asinf_c [-1.00, 1.00] 500000 5.54e-05 1.06e-02% nan 638453383 x1.00
asinf_neon [-1.00, 1.00] 500000 1.57e+00 1.00e+02% 6.84e-01 637349653 x1.00
acosf [-1.00, 1.00] 500000 0.00e+00 0.00e+00% 0.00e+00 636078992 x1.00
acosf_c [-1.00, 1.00] 500000 5.56e-05 6.46e-03% nan 634934201 x1.00
acosf_neon [-1.00, 1.00] 500000 1.57e+00 1.02e+05% 6.84e-01 633793585 x1.00
atanf [-1.00, 1.00] 500000 0.00e+00 0.00e+00% 0.00e+00 632835241 x1.00
atanf_c [-1.00, 1.00] 500000 1.67e-04 2.12e-02% 7.40e-05 632142823 x1.00
atanf_neon [-1.00, 1.00] 500000 7.85e-01 0.00e+00% nan 631387330 x1.00
sinhf [-3.14, 3.14] 500000 0.00e+00 0.00e+00% 0.00e+00 630142014 x1.00
sinhf_c [-3.14, 3.14] 500000 1.91e-06 1.52e-01% 1.85e-07 628992714 x1.00
sinhf_neon [-3.14, 3.14] 500000 1.15e+01 1.00e+02% 4.55e+00 627998454 x1.00
coshf [-3.14, 3.14] 500000 0.00e+00 0.00e+00% 0.00e+00 626869866 x1.00
coshf_c [-3.14, 3.14] 500000 9.54e-07 2.38e-05% 1.64e-07 625829657 x1.00
coshf_neon [-3.14, 3.14] 500000 1.06e+01 9.14e+01% 3.92e+00 624873969 x1.00
tanhf [-3.14, 3.14] 500000 0.00e+00 0.00e+00% 0.00e+00 623689093 x1.00
tanhf_c [-3.14, 3.14] 500000 1.20e-05 2.48e-01% 5.48e-06 622547097 x1.00
tanhf_neon [-3.14, 3.14] 500000 9.96e-01 1.00e+02% 8.26e-01 621506812 x1.00
expf [0.00, 10.00] 500000 0.00e+00 0.00e+00% 0.00e+00 620497304 x1.00
expf_c [0.00, 10.00] 500000 9.77e-03 6.15e-05% 1.64e-03 619569554 x1.00
expf_neon [0.00, 10.00] 500000 2.20e+04 1.00e+02% 4.92e+03 618761400 x1.00
logf [1.00, 1000.00] 500000 0.00e+00 0.00e+00% 0.00e+00 617882765 x1.00
logf_c [1.00, 1000.00] 500000 6.20e-06 1.62e-02% 9.83e-07 617087810 x1.00
logf_neon [1.00, 1000.00] 500000 9.49e+01 inf% 9.39e+01 616388420 x1.00
log10f [1.00, 1000.00] 500000 0.00e+00 0.00e+00% 0.00e+00 615405364 x1.00
log10f_c [1.00, 1000.00] 500000 2.86e-06 6.68e-03% 4.79e-07 614442585 x1.00
log10f_neon [1.00, 1000.00] 500000 4.12e+01 inf% 4.07e+01 613671782 x1.00
floorf [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 611113689 x1.00
floorf_c [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 608159325 x1.00
floorf_neon [1.00, 1000.00] 5000000 2.00e+00 2.00e+02% 1.42e-02 604769008 x1.01
ceilf [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 601342443 x1.00
ceilf_c [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 598387998 x1.00
ceilf_neon [1.00, 1000.00] 5000000 2.00e+00 1.00e+02% 1.02e+00 594959710 x1.01
fabsf [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 592068236 x1.00
fabsf_c [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 589808748 x1.00
fabsf_neon [1.00, 1000.00] 5000000 0.00e+00 0.00e+00% 0.00e+00 587712180 x1.01
sqrtf [1.00, 1000.00] 500000 0.00e+00 0.00e+00% 0.00e+00 586496654 x1.00
sqrtf_c [1.00, 1000.00] 500000 2.33e-04 1.06e-03% 8.69e-05 585470866 x1.00
sqrtf_neon [1.00, 1000.00] 500000 0.00e+00 0.00e+00% nan 584594551 x1.00
invsqrtf [1.00, 1000.00] 500000 0.00e+00 0.00e+00% 0.00e+00 583492213 x1.00
invsqrtf_c [1.00, 1000.00] 500000 4.35e-06 4.78e-04% 2.00e-07 582448164 x1.00
invsqrtf_neon [1.00, 1000.00] 500000 0.00e+00 0.00e+00% nan 581642365 x1.00
atan2f [0.10, 10.00] 10000 0.00e+00 0.00e+00% 0.00e+00 83594269 x1.00
atan2f_c [0.10, 10.00] 10000 1.73e-04 2.23e-02% 0.00e+00 85383651 x0.98
atan2f_neon [0.10, 10.00] 10000 0.00e+00 0.00e+00% 0.00e+00 87387055 x0.96
powf [1.00, 10.00] 10000 0.00e+00 0.00e+00% 0.00e+00 93430489 x1.00
powf_c [1.00, 10.00] 10000 1.08e+05 4.37e-03% 0.00e+00 96726976 x0.97
powf_neon [1.00, 10.00] 10000 9.97e+09 1.00e+02% 0.00e+00 100185753 x0.93
fmodf [1.00, 10.00] 10000 0.00e+00 0.00e+00% 0.00e+00 101653673 x1.00
fmodf_c [1.00, 10.00] 10000 9.90e+00 8.06e-02% 0.00e+00 103177551 x0.99
fmodf_neon [1.00, 10.00] 10000 9.99e+00 1.00e+02% 0.00e+00 104771240 x0.97

67
deps/math-neon/source/math_acosf.c vendored Normal file
View File

@ -0,0 +1,67 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
/*
Test func : acosf(x)
Test Range: -1.0 < x < 1.0
Peak Error: ~0.005%
RMS Error: ~0.001%
*/
const float __acosf_pi_2 = M_PI_2;
float acosf_c(float x)
{
return __acosf_pi_2 - asinf_c(x);
}
float acosf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asinf_neon_hfp(x);
asm volatile (
"vdup.f32 d1, %0 \n\t" //d1 = {pi/2, pi/2};
"vsub.f32 d0, d1, d0 \n\t" //d0 = d1 - d0;
::"r"(__acosf_pi_2):
);
#endif
}
float acosf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
acosf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return acosf_c(x);
#endif
}

183
deps/math-neon/source/math_asinf.c vendored Normal file
View File

@ -0,0 +1,183 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
/*
Test func : asinf(x)
Test Range: -1.0 < x < 1.0
Peak Error: ~0.005%
RMS Error: ~0.001%
*/
const float __asinf_lut[4] = {
0.105312459675071, //p7
0.169303418571894, //p3
0.051599985887214, //p5
0.999954835104825 //p1
};
const float __asinf_pi_2 = M_PI_2;
float asinf_c(float x)
{
float a, b, c, d, r, ax;
int m;
union {
float f;
int i;
} xx;
ax = fabs(x);
d = 0.5;
d = d - ax*0.5;
//fast invsqrt approx
xx.f = d;
xx.i = 0x5F3759DF - (xx.i >> 1); //VRSQRTE
c = d * xx.f;
b = (3.0f - c * xx.f) * 0.5; //VRSQRTS
xx.f = xx.f * b;
c = d * xx.f;
b = (3.0f - c * xx.f) * 0.5;
xx.f = xx.f * b;
//fast inverse approx
d = xx.f;
m = 0x3F800000 - (xx.i & 0x7F800000);
xx.i = xx.i + m;
xx.f = 1.41176471f - 0.47058824f * xx.f;
xx.i = xx.i + m;
b = 2.0 - xx.f * d;
xx.f = xx.f * b;
b = 2.0 - xx.f * d;
xx.f = xx.f * b;
//if |x|>0.5 -> x = sqrt((1-x)/2)
xx.f = xx.f - ax;
a = (ax > 0.5f);
d = __asinf_pi_2 * a;
c = 1.0f - 3.0f * a;
ax = ax + xx.f * a;
//polynomial evaluation
xx.f = ax * ax;
a = (__asinf_lut[0] * ax) * xx.f + (__asinf_lut[2] * ax);
b = (__asinf_lut[1] * ax) * xx.f + (__asinf_lut[3] * ax);
xx.f = xx.f * xx.f;
r = b + a * xx.f;
r = d + c * r;
a = r + r;
b = (x < 0.0f);
r = r - a * b;
return r;
}
float asinf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x};
"vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2};
"vmov.f32 d6, d0 \n\t" //d6 = d0;
"vabs.f32 d0, d0 \n\t" //d0 = fabs(d0) ;
"vmov.f32 d5, #0.5 \n\t" //d5 = 0.5;
"vmls.f32 d5, d0, d5 \n\t" //d5 = d5 - d0*d5;
//fast invsqrt approx
"vmov.f32 d1, d5 \n\t" //d1 = d5
"vrsqrte.f32 d5, d5 \n\t" //d5 = ~ 1.0 / sqrt(d5)
"vmul.f32 d2, d5, d1 \n\t" //d2 = d5 * d1
"vrsqrts.f32 d3, d2, d5 \n\t" //d3 = (3 - d5 * d2) / 2
"vmul.f32 d5, d5, d3 \n\t" //d5 = d5 * d3
"vmul.f32 d2, d5, d1 \n\t" //d2 = d5 * d1
"vrsqrts.f32 d3, d2, d5 \n\t" //d3 = (3 - d5 * d3) / 2
"vmul.f32 d5, d5, d3 \n\t" //d5 = d5 * d3
//fast reciporical approximation
"vrecpe.f32 d1, d5 \n\t" //d1 = ~ 1 / d5;
"vrecps.f32 d2, d1, d5 \n\t" //d2 = 2.0 - d1 * d5;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d5 \n\t" //d2 = 2.0 - d1 * d5;
"vmul.f32 d5, d1, d2 \n\t" //d5 = d1 * d2;
//if |x| > 0.5 -> ax = sqrt((1-ax)/2), r = pi/2
"vsub.f32 d5, d0, d5 \n\t" //d5 = d0 - d5;
"vmov.f32 d2, #0.5 \n\t" //d2 = 0.5;
"vcgt.f32 d3, d0, d2 \n\t" //d3 = (d0 > d2);
"vmov.f32 d1, #3.0 \n\t" //d5 = 3.0;
"vshr.u32 d3, #31 \n\t" //d3 = d3 >> 31;
"vmov.f32 d16, #1.0 \n\t" //d16 = 1.0;
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3;
"vmls.f32 d0, d5, d3[0] \n\t" //d0 = d0 - d5 * d3[0];
"vmul.f32 d7, d4, d3[0] \n\t" //d7 = d5 * d4;
"vmls.f32 d16, d1, d3[0] \n\t" //d16 = d16 - d1 * d3;
//polynomial:
"vmul.f32 d2, d0, d0 \n\t" //d2 = d0*d0 = {ax^2, ax^2}
"vld1.32 {d4, d5}, [%0] \n\t" //d4 = {p7, p3}, d5 = {p5, p1}
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q2, d0[0] \n\t" //q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}
"vmla.f32 d7, d1, d16 \n\t" //d7 = d7 + d1*d16
"vadd.f32 d2, d7, d7 \n\t" //d2 = d7 + d7
"vclt.f32 d3, d6, #0 \n\t" //d3 = (d6 < 0)
"vshr.u32 d3, #31 \n\t" //d3 = d3 >> 31;
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3
"vmls.f32 d7, d2, d3[0] \n\t" //d7 = d7 - d2 * d3[0];
"vmov.f32 s0, s15 \n\t" //s0 = s3
:: "r"(__asinf_lut), "r"(__asinf_pi_2)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
);
#endif
}
float asinf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
asinf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return asinf_c(x);
#endif
}

170
deps/math-neon/source/math_atan2f.c vendored Normal file
View File

@ -0,0 +1,170 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __atan2f_lut[4] = {
-0.0443265554792128, //p7
-0.3258083974640975, //p3
+0.1555786518463281, //p5
+0.9997878412794807 //p1
};
const float __atan2f_pi_2 = M_PI_2;
float atan2f_c(float y, float x)
{
float a, b, c, r, xx;
int m;
union {
float f;
int i;
} xinv;
//fast inverse approximation (2x newton)
xx = fabs(x);
xinv.f = xx;
m = 0x3F800000 - (xinv.i & 0x7F800000);
xinv.i = xinv.i + m;
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
xinv.i = xinv.i + m;
b = 2.0 - xinv.f * xx;
xinv.f = xinv.f * b;
b = 2.0 - xinv.f * xx;
xinv.f = xinv.f * b;
c = fabs(y * xinv.f);
//fast inverse approximation (2x newton)
xinv.f = c;
m = 0x3F800000 - (xinv.i & 0x7F800000);
xinv.i = xinv.i + m;
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
xinv.i = xinv.i + m;
b = 2.0 - xinv.f * c;
xinv.f = xinv.f * b;
b = 2.0 - xinv.f * c;
xinv.f = xinv.f * b;
//if |x| > 1.0 -> ax = -1/ax, r = pi/2
xinv.f = xinv.f + c;
a = (c > 1.0f);
c = c - a * xinv.f;
r = a * __atan2f_pi_2;
//polynomial evaluation
xx = c * c;
a = (__atan2f_lut[0] * c) * xx + (__atan2f_lut[2] * c);
b = (__atan2f_lut[1] * c) * xx + (__atan2f_lut[3] * c);
xx = xx * xx;
r = r + a * xx;
r = r + b;
//determine quadrant and test for small x.
b = M_PI;
b = b - 2.0f * r;
r = r + (x < 0.0f) * b;
b = (fabs(x) < 0.000001f);
c = !b;
r = c * r;
r = r + __atan2f_pi_2 * b;
b = r + r;
r = r - (y < 0.0f) * b;
return r;
}
float atan2f_neon_hfp(float y, float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d17, d0[1] \n\t" //d17 = {x, x};
"vdup.f32 d16, d0[0] \n\t" //d16 = {y, y};
//1.0 / x
"vrecpe.f32 d18, d17 \n\t" //d16 = ~ 1 / d1;
"vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1;
"vmul.f32 d18, d18, d19 \n\t" //d16 = d16 * d17;
"vrecps.f32 d19, d18, d17 \n\t" //d17 = 2.0 - d16 * d1;
"vmul.f32 d18, d18, d19 \n\t" //d16 = d16 * d17;
//y * (1.0 /x)
"vmul.f32 d0, d16, d18 \n\t" //d0 = d16 * d18;
"vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2};
"vmov.f32 d6, d0 \n\t" //d6 = d0;
"vabs.f32 d0, d0 \n\t" //d0 = fabs(d0) ;
//fast reciporical approximation
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
//if |x| > 1.0 -> ax = 1/ax, r = pi/2
"vadd.f32 d1, d1, d0 \n\t" //d1 = d1 + d0;
"vmov.f32 d2, #1.0 \n\t" //d2 = 1.0;
"vcgt.f32 d3, d0, d2 \n\t" //d3 = (d0 > d2);
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3;
"vmls.f32 d0, d1, d3 \n\t" //d0 = d0 - d1 * d3;
"vmul.f32 d7, d3, d4 \n\t" //d7 = d3 * d4;
//polynomial:
"vmul.f32 d2, d0, d0 \n\t" //d2 = d0*d0 = {ax^2, ax^2}
"vld1.32 {d4, d5}, [%0] \n\t" //d4 = {p7, p3}, d5 = {p5, p1}
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q2, d0[0] \n\t" //q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}
"vadd.f32 d1, d1, d7 \n\t" //d1 = d1 + d7
"vadd.f32 d2, d1, d1 \n\t" //d2 = d1 + d1
"vclt.f32 d3, d6, #0 \n\t" //d3 = (d6 < 0)
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3
"vmls.f32 d1, d3, d2 \n\t" //d1 = d1 - d2 * d3;
"vmov.f32 s0, s3 \n\t" //s0 = s3
:: "r"(__atan2f_lut), "r"(__atan2f_pi_2)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
);
#endif
}
float atan2f_neon_sfp(float x, float y)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
asm volatile ("vmov.f32 s1, r1 \n\t");
atan2f_neon_hfp(x, y);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return atan2f_c(y, x);
#endif
};

149
deps/math-neon/source/math_atanf.c vendored Normal file
View File

@ -0,0 +1,149 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __atanf_lut[4] = {
-0.0443265554792128, //p7
-0.3258083974640975, //p3
+0.1555786518463281, //p5
+0.9997878412794807 //p1
};
const float __atanf_pi_2 = M_PI_2;
float atanf_c(float x)
{
float a, b, r, xx;
int m;
union {
float f;
int i;
} xinv, ax;
ax.f = fabs(x);
//fast inverse approximation (2x newton)
xinv.f = ax.f;
m = 0x3F800000 - (xinv.i & 0x7F800000);
xinv.i = xinv.i + m;
xinv.f = 1.41176471f - 0.47058824f * xinv.f;
xinv.i = xinv.i + m;
b = 2.0 - xinv.f * ax.f;
xinv.f = xinv.f * b;
b = 2.0 - xinv.f * ax.f;
xinv.f = xinv.f * b;
//if |x| > 1.0 -> ax = -1/ax, r = pi/2
xinv.f = xinv.f + ax.f;
a = (ax.f > 1.0f);
ax.f = ax.f - a * xinv.f;
r = a * __atanf_pi_2;
//polynomial evaluation
xx = ax.f * ax.f;
a = (__atanf_lut[0] * ax.f) * xx + (__atanf_lut[2] * ax.f);
b = (__atanf_lut[1] * ax.f) * xx + (__atanf_lut[3] * ax.f);
xx = xx * xx;
b = b + a * xx;
r = r + b;
//if x < 0 -> r = -r
a = 2 * r;
b = (x < 0.0f);
r = r - a * b;
return r;
}
float atanf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x};
"vdup.f32 d4, %1 \n\t" //d4 = {pi/2, pi/2};
"vmov.f32 d6, d0 \n\t" //d6 = d0;
"vabs.f32 d0, d0 \n\t" //d0 = fabs(d0) ;
//fast reciporical approximation
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
//if |x| > 1.0 -> ax = -1/ax, r = pi/2
"vadd.f32 d1, d1, d0 \n\t" //d1 = d1 + d0;
"vmov.f32 d2, #1.0 \n\t" //d2 = 1.0;
"vcgt.f32 d3, d0, d2 \n\t" //d3 = (d0 > d2);
"vshr.u32 d3, #31 \n\t" //d3 = (d0 > d2);
"vcvt.f32.u32 d3, d3 \n\t" //d5 = (float) d3;
"vmls.f32 d0, d1, d3[0] \n\t" //d0 = d0 - d1 * d3[0];
"vmul.f32 d7, d4, d3[0] \n\t" //d7 = d5 * d4;
//polynomial:
"vmul.f32 d2, d0, d0 \n\t" //d2 = d0*d0 = {ax^2, ax^2}
"vld1.32 {d4, d5}, [%0] \n\t" //d4 = {p7, p3}, d5 = {p5, p1}
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q2, d0[0] \n\t" //q0 = q2 * d0[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2[0] = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d1[0] = {..., p1x + p3x^3 + p5x^5 + p7x^7}
"vadd.f32 d1, d1, d7 \n\t" //d1 = d1 + d7
"vadd.f32 d2, d1, d1 \n\t" //d2 = d1 + d1
"vclt.f32 d3, d6, #0 \n\t" //d3 = (d6 < 0)
"vshr.u32 d3, #31 \n\t" //d3 = (d0 > d2);
"vcvt.f32.u32 d3, d3 \n\t" //d3 = (float) d3
"vmls.f32 d1, d3, d2 \n\t" //d1 = d1 - d2 * d3;
"vmov.f32 s0, s3 \n\t" //s0 = s3
:: "r"(__atanf_lut), "r"(__atanf_pi_2)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
);
#endif
}
float atanf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vdup.f32 d0, r0 \n\t");
atanf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return atanf_c(x);
#endif
};

71
deps/math-neon/source/math_ceilf.c vendored Normal file
View File

@ -0,0 +1,71 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Assumes the floating point value |x| < 2147483648
*/
#include "math.h"
#include "math_neon.h"
float ceilf_c(float x)
{
int n;
float r;
n = (int) x;
r = (float) n;
r = r + (x > r);
return r;
}
float ceilf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vcvt.s32.f32 d1, d0 \n\t" //d1 = (int) d0;
"vcvt.f32.s32 d1, d1 \n\t" //d1 = (float) d1;
"vcgt.f32 d0, d0, d1 \n\t" //d0 = (d0 > d1);
"vshr.u32 d0, #31 \n\t" //d0 = d0 >> 31;
"vcvt.f32.u32 d0, d0 \n\t" //d0 = (float) d0;
"vadd.f32 d0, d1, d0 \n\t" //d0 = d1 + d0;
::: "d0", "d1"
);
#endif
}
float ceilf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
ceilf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return ceilf_c(x);
#endif
};

50
deps/math-neon/source/math_cosf.c vendored Normal file
View File

@ -0,0 +1,50 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math_neon.h"
float cosf_c(float x)
{
return sinf_c(x + M_PI_2);
}
float cosf_neon_hfp(float x)
{
#ifdef __MATH_NEON
float xx = x + M_PI_2;
return sinf_neon_hfp(xx);
#endif
}
float cosf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vdup.f32 d0, r0 \n\t");
cosf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return cosf_c(x);
#endif
};

120
deps/math-neon/source/math_coshf.c vendored Normal file
View File

@ -0,0 +1,120 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __coshf_rng[2] = {
1.442695041f,
0.693147180f
};
const float __coshf_lut[16] = {
0.00019578093328483123, //p7
0.00019578093328483123, //p7
0.0014122663401803872, //p6
0.0014122663401803872, //p6
0.008336936973260111, //p5
0.008336936973260111, //p5
0.04165989275009526, //p4
0.04165989275009526, //p4
0.16666570253074878, //p3
0.16666570253074878, //p3
0.5000006143673624, //p2
0.5000006143673624, //p2
1.000000059694879, //p1
1.000000059694879, //p1
0.9999999916728642, //p0
0.9999999916728642 //p0
};
float coshf_c(float x)
{
float a, b, xx;
xx = -x;
a = expf_c(x);
b = expf_c(xx);
a = a * 0.5f;
a = a + 0.5f * b;
return a;
}
float coshf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x}
"fnegs s1, s1 \n\t" //s1 = -s1
//Range Reduction:
"vld1.32 d2, [%0] \n\t" //d2 = {invrange, range}
"vld1.32 {d16, d17}, [%1]! \n\t"
"vmul.f32 d6, d0, d2[0] \n\t" //d6 = d0 * d2[0]
"vcvt.s32.f32 d6, d6 \n\t" //d6 = (int) d6
"vld1.32 {d18}, [%1]! \n\t"
"vcvt.f32.s32 d1, d6 \n\t" //d1 = (float) d6
"vld1.32 {d19}, [%1]! \n\t"
"vmls.f32 d0, d1, d2[1] \n\t" //d0 = d0 - d1 * d2[1]
"vld1.32 {d20}, [%1]! \n\t"
//polynomial:
"vmla.f32 d17, d16, d0 \n\t" //d17 = d17 + d16 * d0;
"vld1.32 {d21}, [%1]! \n\t"
"vmla.f32 d18, d17, d0 \n\t" //d18 = d18 + d17 * d0;
"vld1.32 {d22}, [%1]! \n\t"
"vmla.f32 d19, d18, d0 \n\t" //d19 = d19 + d18 * d0;
"vld1.32 {d23}, [%1]! \n\t"
"vmla.f32 d20, d19, d0 \n\t" //d20 = d20 + d19 * d0;
"vmla.f32 d21, d20, d0 \n\t" //d21 = d21 + d20 * d0;
"vmla.f32 d22, d21, d0 \n\t" //d22 = d22 + d21 * d0;
"vmla.f32 d23, d22, d0 \n\t" //d23 = d23 + d22 * d0;
//multiply by 2 ^ m
"vshl.i32 d6, d6, #23 \n\t" //d6 = d6 << 23
"vadd.i32 d0, d23, d6 \n\t" //d0 = d22 + d6
"vdup.f32 d2, d0[1] \n\t" //d2 = s1
"vmov.f32 d1, #0.5 \n\t" //d1 = 0.5
"vadd.f32 d0, d0, d2 \n\t" //d0 = d0 + d2
"vmul.f32 d0, d1 \n\t" //d0 = d0 * d1
:: "r"(__coshf_rng), "r"(__coshf_lut)
: "d0", "d1", "q1", "q2", "d6"
);
#endif
}
float coshf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
coshf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return coshf_c(x);
#endif
};

135
deps/math-neon/source/math_expf.c vendored Normal file
View File

@ -0,0 +1,135 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Based on:
e ^ x = (1+m) * (2^n)
x = log(1+m) + n * log(2)
n = (int) (x * 1.0 / log(2))
(1+m) = e ^ (x - n * log(2))
(1+m) = Poly(x - n * log(2))
where Poly(x) is the Minimax approximation of e ^ x over the
range [-Log(2), Log(2)]
Test func : expf(x)
Test Range: 0 < x < 50
Peak Error: ~0.00024%
RMS Error: ~0.00007%
*/
#include "math.h"
#include "math_neon.h"
const float __expf_rng[2] = {
1.442695041f,
0.693147180f
};
const float __expf_lut[8] = {
0.9999999916728642, //p0
0.04165989275009526, //p4
0.5000006143673624, //p2
0.0014122663401803872, //p6
1.000000059694879, //p1
0.008336936973260111, //p5
0.16666570253074878, //p3
0.00019578093328483123 //p7
};
float expf_c(float x)
{
float a, b, c, d, xx;
int m;
union {
float f;
int i;
} r;
//Range Reduction:
m = (int) (x * __expf_rng[0]);
x = x - ((float) m) * __expf_rng[1];
//Taylor Polynomial (Estrins)
a = (__expf_lut[4] * x) + (__expf_lut[0]);
b = (__expf_lut[6] * x) + (__expf_lut[2]);
c = (__expf_lut[5] * x) + (__expf_lut[1]);
d = (__expf_lut[7] * x) + (__expf_lut[3]);
xx = x * x;
a = a + b * xx;
c = c + d * xx;
xx = xx* xx;
r.f = a + c * xx;
//multiply by 2 ^ m
m = m << 23;
r.i = r.i + m;
return r.f;
}
float expf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x}
//Range Reduction:
"vld1.32 d2, [%0] \n\t" //d2 = {invrange, range}
"vmul.f32 d6, d0, d2[0] \n\t" //d6 = d0 * d2[0]
"vcvt.s32.f32 d6, d6 \n\t" //d6 = (int) d6
"vcvt.f32.s32 d1, d6 \n\t" //d1 = (float) d6
"vmls.f32 d0, d1, d2[1] \n\t" //d0 = d0 - d1 * d2[1]
//polynomial:
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0*d0 = {x^2, x^2}
"vld1.32 {d2, d3, d4, d5}, [%1] \n\t" //q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
"vmla.f32 q1, q2, d0[0] \n\t" //q1 = q1 + q2 * d0[0]
"vmla.f32 d2, d3, d1[0] \n\t" //d2 = d2 + d3 * d1[0]
"vmul.f32 d1, d1, d1 \n\t" //d1 = d1 * d1 = {x^4, x^4}
"vmla.f32 d2, d1, d2[1] \n\t" //d2 = d2 + d1 * d2[1]
//multiply by 2 ^ m
"vshl.i32 d6, d6, #23 \n\t" //d6 = d6 << 23
"vadd.i32 d0, d2, d6 \n\t" //d0 = d2 + d6
:: "r"(__expf_rng), "r"(__expf_lut)
: "d0", "d1", "q1", "q2", "d6"
);
#endif
}
float expf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
expf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return expf_c(x);
#endif
};

58
deps/math-neon/source/math_fabsf.c vendored Normal file
View File

@ -0,0 +1,58 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math_neon.h"
float fabsf_c(float x)
{
union {
int i;
float f;
} xx;
xx.f = x;
xx.i = xx.i & 0x7FFFFFFF;
return xx.f;
}
float fabsf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"fabss s0, s0 \n\t" //s0 = fabs(s0)
);
#endif
}
float fabsf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"bic r0, r0, #0x80000000 \n\t" //r0 = r0 & ~(1 << 31)
);
#else
return fabsf_c(x);
#endif
}

66
deps/math-neon/source/math_floorf.c vendored Normal file
View File

@ -0,0 +1,66 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Assumes the floating point value |x| < 2147483648
*/
#include "math.h"
#include "math_neon.h"
float floorf_c(float x)
{
int n;
float r;
n = (int) x;
r = (float) n;
r = r - (r > x);
return r;
}
float floorf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vcvt.s32.f32 d1, d0 \n\t" //d1 = (int) d0;
"vcvt.f32.s32 d1, d1 \n\t" //d1 = (float) d1;
"vcgt.f32 d0, d1, d0 \n\t" //d0 = (d1 > d0);
"vshr.u32 d0, #31 \n\t" //d0 = d0 >> 31;
"vcvt.f32.u32 d0, d0 \n\t" //d0 = (float) d0;
"vsub.f32 d0, d1, d0 \n\t" //d0 = d1 - d0;
::: "d0", "d1"
);
#endif
}
float floorf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
floorf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return floorf_c(x);
#endif
};

100
deps/math-neon/source/math_fmodf.c vendored Normal file
View File

@ -0,0 +1,100 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Assumes the floating point value |x / y| < 2,147,483,648
*/
#include "math_neon.h"
float fmodf_c(float x, float y)
{
int n;
union {
float f;
int i;
} yinv;
float a;
//fast reciporical approximation (4x Newton)
yinv.f = y;
n = 0x3F800000 - (yinv.i & 0x7F800000);
yinv.i = yinv.i + n;
yinv.f = 1.41176471f - 0.47058824f * yinv.f;
yinv.i = yinv.i + n;
a = 2.0 - yinv.f * y;
yinv.f = yinv.f * a;
a = 2.0 - yinv.f * y;
yinv.f = yinv.f * a;
a = 2.0 - yinv.f * y;
yinv.f = yinv.f * a;
a = 2.0 - yinv.f * y;
yinv.f = yinv.f * a;
n = (int)(x * yinv.f);
x = x - ((float)n) * y;
return x;
}
float fmodf_neon_hfp(float x, float y)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d1, d0[1] \n\t" //d1[0] = y
"vdup.f32 d0, d0[0] \n\t" //d1[0] = y
//fast reciporical approximation
"vrecpe.f32 d2, d1 \n\t" //d2 = ~1.0 / d1
"vrecps.f32 d3, d2, d1 \n\t" //d3 = 2.0 - d2 * d1;
"vmul.f32 d2, d2, d3 \n\t" //d2 = d2 * d3;
"vrecps.f32 d3, d2, d1 \n\t" //d3 = 2.0 - d2 * d1;
"vmul.f32 d2, d2, d3 \n\t" //d2 = d2 * d3;
"vrecps.f32 d3, d2, d1 \n\t" //d3 = 2.0 - d2 * d1;
"vmul.f32 d2, d2, d3 \n\t" //d2 = d2 * d3;
"vrecps.f32 d3, d2, d1 \n\t" //d3 = 2.0 - d2 * d1;
"vmul.f32 d2, d2, d3 \n\t" //d2 = d2 * d3;
"vmul.f32 d2, d2, d0 \n\t" //d2 = d2 * d0;
"vcvt.s32.f32 d2, d2 \n\t" //d2 = (int) d2;
"vcvt.f32.s32 d2, d2 \n\t" //d2 = (float) d2;
"vmls.f32 d0, d1, d2 \n\t" //d0 = d0 - d1 * d2;
::: "d0", "d1", "d2", "d3"
);
#endif
}
float fmodf_neon_sfp(float x, float y)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
asm volatile ("vmov.f32 s1, r1 \n\t");
fmodf_neon_hfp(x, y);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return fmodf_c(x,y);
#endif
};

79
deps/math-neon/source/math_invsqrtf.c vendored Normal file
View File

@ -0,0 +1,79 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
float invsqrtf_c(float x)
{
float b, c;
union {
float f;
int i;
} a;
//fast invsqrt approx
a.f = x;
a.i = 0x5F3759DF - (a.i >> 1); //VRSQRTE
c = x * a.f;
b = (3.0f - c * a.f) * 0.5; //VRSQRTS
a.f = a.f * b;
c = x * a.f;
b = (3.0f - c * a.f) * 0.5;
a.f = a.f * b;
return a.f;
}
float invsqrtf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
::: "d0", "d1", "d2", "d3"
);
#endif
}
float invsqrtf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
invsqrtf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return invsqrtf_c(x);
#endif
};

67
deps/math-neon/source/math_ldexpf.c vendored Normal file
View File

@ -0,0 +1,67 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
float ldexpf_c(float m, int e)
{
union {
float f;
int i;
} r;
r.f = m;
r.i += (e << 23);
return r.f;
}
float ldexpf_neon_hfp(float m, int e)
{
#ifdef __MATH_NEON
float r;
asm volatile (
"lsl r0, r0, #23 \n\t" //r0 = r0 << 23
"vdup.i32 d1, r0 \n\t" //d1 = {r0, r0}
"vadd.i32 d0, d0, d1 \n\t" //d0 = d0 + d1
::: "d0", "d1"
);
#endif
}
float ldexpf_neon_sfp(float m, int e)
{
#ifdef __MATH_NEON
float r;
asm volatile (
"lsl r1, r1, #23 \n\t" //r1 = r1 << 23
"vdup.f32 d0, r0 \n\t" //d0 = {r0, r0}
"vdup.i32 d1, r1 \n\t" //d1 = {r1, r1}
"vadd.i32 d0, d0, d1 \n\t" //d0 = d0 + d1
"vmov.f32 r0, s0 \n\t" //r0 = s0
::: "d0", "d1"
);
#else
return ldexpf_c(m,e);
#endif
}

135
deps/math-neon/source/math_log10f.c vendored Normal file
View File

@ -0,0 +1,135 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Based on:
log10(x) = log10((1+m) * (2^n))
log(x) = n * log10(2) + log10(1 + m)
log(1+m) = Poly(1+m)
where Poly(x) is the Minimax approximation of log10(x) over the
range [1, 2]
Test func : log10f(x)
Test Range: 1 < x < 10000
Peak Error: ~0.000040%
RMS Error: ~0.000008%
*/
#include "math.h"
#include "math_neon.h"
const float __log10f_rng = 0.3010299957f;
const float __log10f_lut[8] = {
-0.99697286229624, //p0
-1.07301643912502, //p4
-2.46980061535534, //p2
-0.07176870463131, //p6
2.247870219989470, //p1
0.366547581117400, //p5
1.991005185100089, //p3
0.006135635201050, //p7
};
float log10f_c(float x)
{
float a, b, c, d, xx;
int m;
union {
float f;
int i;
} r;
//extract exponent
r.f = x;
m = (r.i >> 23);
m = m - 127;
r.i = r.i - (m << 23);
//Taylor Polynomial (Estrins)
xx = r.f * r.f;
a = (__log10f_lut[4] * r.f) + (__log10f_lut[0]);
b = (__log10f_lut[6] * r.f) + (__log10f_lut[2]);
c = (__log10f_lut[5] * r.f) + (__log10f_lut[1]);
d = (__log10f_lut[7] * r.f) + (__log10f_lut[3]);
a = a + b * xx;
c = c + d * xx;
xx = xx * xx;
r.f = a + c * xx;
//add exponent
r.f = r.f + ((float) m) * __log10f_rng;
return r.f;
}
float log10f_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x,x};
//extract exponent
"vmov.i32 d2, #127 \n\t" //d2 = 127;
"vshr.u32 d6, d0, #23 \n\t" //d6 = d0 >> 23;
"vsub.i32 d6, d6, d2 \n\t" //d6 = d6 - d2;
"vshl.u32 d1, d6, #23 \n\t" //d1 = d6 << 23;
"vsub.i32 d0, d0, d1 \n\t" //d0 = d0 + d1;
//polynomial:
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0*d0 = {x^2, x^2}
"vld1.32 {d2, d3, d4, d5}, [%1] \n\t" //q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
"vmla.f32 q1, q2, d0[0] \n\t" //q1 = q1 + q2 * d0[0]
"vmla.f32 d2, d3, d1[0] \n\t" //d2 = d2 + d3 * d1[0]
"vmul.f32 d1, d1, d1 \n\t" //d1 = d1 * d1 = {x^4, x^4}
"vmla.f32 d2, d1, d2[1] \n\t" //d2 = d2 + d1 * d2[1]
//add exponent
"vdup.32 d7, %0 \n\t" //d7 = {rng, rng}
"vcvt.f32.s32 d6, d6 \n\t" //d6 = (float) d6
"vmla.f32 d2, d6, d7 \n\t" //d2 = d2 + d6 * d7
"vmov.f32 s0, s4 \n\t" //s0 = s4
:: "r"(__log10f_rng), "r"(__log10f_lut)
: "d0", "d1", "q1", "q2", "d6", "d7"
);
#endif
}
float log10f_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
log10f_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return log10f_c(x);
#endif
};

135
deps/math-neon/source/math_logf.c vendored Normal file
View File

@ -0,0 +1,135 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Based on:
log(x) = log((1+m) * (2^n))
log(x) = n * log(2) + log(1 + m)
log(1+m) = Poly(1+m)
where Poly(x) is the Minimax approximation of log(x) over the
range [1, 2]
Test func : logf(x)
Test Range: 1 < x < 10000
Peak Error: ~0.000601%
RMS Error: ~0.000005%
*/
#include "math.h"
#include "math_neon.h"
const float __logf_rng = 0.693147180f;
const float __logf_lut[8] = {
-2.295614848256274, //p0
-2.470711633419806, //p4
-5.686926051100417, //p2
-0.165253547131978, //p6
+5.175912446351073, //p1
+0.844006986174912, //p5
+4.584458825456749, //p3
+0.014127821926000 //p7
};
float logf_c(float x)
{
float a, b, c, d, xx;
int m;
union {
float f;
int i;
} r;
//extract exponent
r.f = x;
m = (r.i >> 23);
m = m - 127;
r.i = r.i - (m << 23);
//Taylor Polynomial (Estrins)
xx = r.f * r.f;
a = (__logf_lut[4] * r.f) + (__logf_lut[0]);
b = (__logf_lut[6] * r.f) + (__logf_lut[2]);
c = (__logf_lut[5] * r.f) + (__logf_lut[1]);
d = (__logf_lut[7] * r.f) + (__logf_lut[3]);
a = a + b * xx;
c = c + d * xx;
xx = xx * xx;
r.f = a + c * xx;
//add exponent
r.f = r.f + ((float) m) * __logf_rng;
return r.f;
}
float logf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x,x};
//extract exponent
"vmov.i32 d2, #127 \n\t" //d2 = 127;
"vshr.u32 d6, d0, #23 \n\t" //d6 = d0 >> 23;
"vsub.i32 d6, d6, d2 \n\t" //d6 = d6 - d2;
"vshl.u32 d1, d6, #23 \n\t" //d1 = d6 << 23;
"vsub.i32 d0, d0, d1 \n\t" //d0 = d0 + d1;
//polynomial:
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0*d0 = {x^2, x^2}
"vld1.32 {d2, d3, d4, d5}, [%1] \n\t" //q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
"vmla.f32 q1, q2, d0[0] \n\t" //q1 = q1 + q2 * d0[0]
"vmla.f32 d2, d3, d1[0] \n\t" //d2 = d2 + d3 * d1[0]
"vmul.f32 d1, d1, d1 \n\t" //d1 = d1 * d1 = {x^4, x^4}
"vmla.f32 d2, d1, d2[1] \n\t" //d2 = d2 + d1 * d2[1]
//add exponent
"vdup.32 d7, %0 \n\t" //d7 = {rng, rng}
"vcvt.f32.s32 d6, d6 \n\t" //d6 = (float) d6
"vmla.f32 d2, d6, d7 \n\t" //d2 = d2 + d6 * d7
"vmov.f32 s0, s4 \n\t" //s0 = s4
:: "r"(__logf_rng), "r"(__logf_lut)
: "d0", "d1", "q1", "q2", "d6", "d7"
);
#endif
}
float logf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
logf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return logf_c(x);
#endif
};

95
deps/math-neon/source/math_mat2.c vendored Normal file
View File

@ -0,0 +1,95 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Matrices are specified in column major format:
| a c |
| b d |
therefore m[2] = c
*/
#include "math_neon.h"
//matrix matrix multipication. d = m0 * m1;
void
matmul2_c(float m0[4], float m1[4], float d[4])
{
d[0] = m0[0]*m1[0] + m0[2]*m1[1];
d[1] = m0[1]*m1[0] + m0[3]*m1[1];
d[2] = m0[0]*m1[2] + m0[2]*m1[3];
d[3] = m0[1]*m1[2] + m0[3]*m1[3];
}
void
matmul2_neon(float m0[4], float m1[4], float d[4])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d0, d1}, [%0] \n\t" //Q1 = m0
"vld1.32 {d2, d3}, [%1] \n\t" //Q2 = m1
"vmul.f32 d4, d0, d2[0] \n\t" //D4 = D0*D2[0]
"vmul.f32 d5, d0, d3[0] \n\t" //D5 = D0*D3[0]
"vmla.f32 d4, d1, d2[1] \n\t" //D4 += D1*D2[1]
"vmla.f32 d5, d1, d3[1] \n\t" //D5 += D1*D3[1]
"vst1.32 {d4, d5}, [%2] \n\t" //Q4 = m+12
:: "r"(m0), "r"(m1), "r"(d)
: "q0", "q1", "q2", "memory"
);
#else
matmul2_c(m0, m1, d);
#endif
}
//matrix vector multiplication. d = m * v
void
matvec2_c(float m[4], float v[2], float d[2])
{
d[0] = m[0]*v[0] + m[2]*v[1];
d[1] = m[1]*v[0] + m[3]*v[1];
}
void
matvec2_neon(float m[4], float v[2], float d[2])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 d0, [%1] \n\t" //d0 = v
"vld1.32 {d1, d2}, [%0] \n\t" //Q1 = m
"vmul.f32 d3, d1, d0[0] \n\t" //Q5 = Q1*d0[0]
"vmla.f32 d3, d2, d0[1] \n\t" //Q5 += Q1*d0[1]
"vst1.32 d3, [%2] \n\t" //Q4 = m+12
:: "r"(m), "r"(v), "r"(d)
: "d0", "d1", "d2","d3", "memory"
);
#else
matvec2_c(m, v, d);
#endif
}

131
deps/math-neon/source/math_mat3.c vendored Normal file
View File

@ -0,0 +1,131 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Matrices are specified in row major format:
| x0 x2 |
| x1 x3 |
therefore m[2] = x2
*/
#include "math_neon.h"
//matrix matrix multipication. d = m0 * m1;
void
matmul3_c(float m0[9], float m1[9], float d[9])
{
d[0] = m0[0]*m1[0] + m0[3]*m1[1] + m0[6]*m1[2];
d[1] = m0[1]*m1[0] + m0[4]*m1[1] + m0[7]*m1[2];
d[2] = m0[2]*m1[0] + m0[5]*m1[1] + m0[8]*m1[2];
d[3] = m0[0]*m1[3] + m0[3]*m1[4] + m0[6]*m1[5];
d[4] = m0[1]*m1[3] + m0[4]*m1[4] + m0[7]*m1[5];
d[5] = m0[2]*m1[3] + m0[5]*m1[4] + m0[8]*m1[5];
d[6] = m0[0]*m1[6] + m0[3]*m1[7] + m0[6]*m1[8];
d[7] = m0[1]*m1[6] + m0[4]*m1[7] + m0[7]*m1[8];
d[8] = m0[2]*m1[6] + m0[5]*m1[7] + m0[8]*m1[8];
}
void
matmul3_neon(float m0[9], float m1[9], float d[9])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1
"vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4
"flds s8, [%1] \n\t" //q2 = m1+8
"vld1.32 {d6, d7}, [%0] \n\t" //q3[0] = m0
"add %0, %0, #12 \n\t" //q3[0] = m0
"vld1.32 {d8, d9}, [%0] \n\t" //q4[0] = m0+12
"add %0, %0, #12 \n\t" //q3[0] = m0
"vld1.32 {d10}, [%0] \n\t" //q5[0] = m0+24
"add %0, %0, #8 \n\t" //q3[0] = m0
"flds s22, [%0] \n\t" //q2 = m1+8
"vmul.f32 q6, q3, d0[0] \n\t" //q12 = q3 * d0[0]
"vmul.f32 q7, q3, d1[1] \n\t" //q13 = q3 * d2[0]
"vmul.f32 q8, q3, d3[0] \n\t" //q14 = q3 * d4[0]
"vmla.f32 q6, q4, d0[1] \n\t" //q12 = q9 * d0[1]
"vmla.f32 q7, q4, d2[0] \n\t" //q13 = q9 * d2[1]
"vmla.f32 q8, q4, d3[1] \n\t" //q14 = q9 * d4[1]
"vmla.f32 q6, q5, d1[0] \n\t" //q12 = q10 * d0[0]
"vmla.f32 q7, q5, d2[1] \n\t" //q13 = q10 * d2[0]
"vmla.f32 q8, q5, d4[0] \n\t" //q14 = q10 * d4[0]
"vmov.f32 q0, q8 \n\t" //q14 = q10 * d4[0]
"vst1.32 {d12, d13}, [%2] \n\t" //d = q12
"add %2, %2, #12 \n\t" //q3[0] = m0
"vst1.32 {d14, d15}, [%2] \n\t" //d+4 = q13
"add %2, %2, #12 \n\t" //q3[0] = m0
"vst1.32 {d0}, [%2] \n\t" //d+8 = q14
"add %2, %2, #8 \n\t" //q3[0] = m0
"fsts s2, [%2] \n\t" //d = q12
: "+r"(m0), "+r"(m1), "+r"(d):
: "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "memory"
);
#else
matmul3_c(m0, m1, d);
#endif
};
//matrix vector multiplication. d = m * v
void
matvec3_c(float m[9], float v[3], float d[3])
{
d[0] = m[0]*v[0] + m[3]*v[1] + m[6]*v[2];
d[1] = m[1]*v[0] + m[4]*v[1] + m[7]*v[2];
d[2] = m[2]*v[0] + m[5]*v[1] + m[8]*v[2];
}
void
matvec3_neon(float m[9], float v[3], float d[3])
{
#ifdef __MATH_NEON
int tmp;
asm volatile (
"mov %3, #12 \n\t" //r3 = 12
"vld1.32 {d0, d1}, [%1] \n\t" //Q0 = v
"vld1.32 {d2, d3}, [%0], %3 \n\t" //Q1 = m
"vld1.32 {d4, d5}, [%0], %3 \n\t" //Q2 = m+12
"vld1.32 {d6, d7}, [%0], %3 \n\t" //Q3 = m+24
"vmul.f32 q9, q1, d0[0] \n\t" //Q9 = Q1*Q0[0]
"vmla.f32 q9, q2, d0[1] \n\t" //Q9 += Q2*Q0[1]
"vmla.f32 q9, q3, d1[0] \n\t" //Q9 += Q3*Q0[2]
"vmov.f32 q0, q9 \n\t" //Q0 = q9
"vst1.32 d0, [%2]! \n\t" //r2 = D24
"fsts s2, [%2] \n\t" //r2 = D25[0]
: "+r"(m), "+r"(v), "+r"(d), "+r"(tmp):
: "q0", "q9", "q10","q11", "q12", "q13", "memory"
);
#else
matvec3_c(m, v, d);
#endif
}

144
deps/math-neon/source/math_mat4.c vendored Normal file
View File

@ -0,0 +1,144 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Matrices are specified in row major format:
| x0 x2 |
| x1 x3 |
therefore m[2] = x2
*/
#include "math_neon.h"
//matrix matrix multipication. d = m0 * m1;
void
matmul4_c(float m0[16], float m1[16], float d[16])
{
d[0] = m0[0]*m1[0] + m0[4]*m1[1] + m0[8]*m1[2] + m0[12]*m1[3];
d[1] = m0[1]*m1[0] + m0[5]*m1[1] + m0[9]*m1[2] + m0[13]*m1[3];
d[2] = m0[2]*m1[0] + m0[6]*m1[1] + m0[10]*m1[2] + m0[14]*m1[3];
d[3] = m0[3]*m1[0] + m0[7]*m1[1] + m0[11]*m1[2] + m0[15]*m1[3];
d[4] = m0[0]*m1[4] + m0[4]*m1[5] + m0[8]*m1[6] + m0[12]*m1[7];
d[5] = m0[1]*m1[4] + m0[5]*m1[5] + m0[9]*m1[6] + m0[13]*m1[7];
d[6] = m0[2]*m1[4] + m0[6]*m1[5] + m0[10]*m1[6] + m0[14]*m1[7];
d[7] = m0[3]*m1[4] + m0[7]*m1[5] + m0[11]*m1[6] + m0[15]*m1[7];
d[8] = m0[0]*m1[8] + m0[4]*m1[9] + m0[8]*m1[10] + m0[12]*m1[11];
d[9] = m0[1]*m1[8] + m0[5]*m1[9] + m0[9]*m1[10] + m0[13]*m1[11];
d[10] = m0[2]*m1[8] + m0[6]*m1[9] + m0[10]*m1[10] + m0[14]*m1[11];
d[11] = m0[3]*m1[8] + m0[7]*m1[9] + m0[11]*m1[10] + m0[15]*m1[11];
d[12] = m0[0]*m1[12] + m0[4]*m1[13] + m0[8]*m1[14] + m0[12]*m1[15];
d[13] = m0[1]*m1[12] + m0[5]*m1[13] + m0[9]*m1[14] + m0[13]*m1[15];
d[14] = m0[2]*m1[12] + m0[6]*m1[13] + m0[10]*m1[14] + m0[14]*m1[15];
d[15] = m0[3]*m1[12] + m0[7]*m1[13] + m0[11]*m1[14] + m0[15]*m1[15];
}
void
matmul4_neon(float m0[16], float m1[16], float d[16])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1
"vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4
"vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8
"vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12
"vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0
"vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4
"vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8
"vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12
"vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0]
"vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0]
"vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0]
"vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0]
"vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1]
"vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1]
"vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1]
"vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1]
"vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0]
"vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0]
"vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0]
"vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0]
"vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1]
"vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1]
"vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1]
"vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1]
"vst1.32 {d24, d25}, [%2]! \n\t" //d = q12
"vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13
"vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14
"vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15
: "+r"(m0), "+r"(m1), "+r"(d) :
: "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
"memory"
);
#else
matmul4_c(m0, m1, d);
#endif
}
//matrix vector multiplication. d = m * v
void
matvec4_c(float m[16], float v[4], float d[4])
{
d[0] = m[0]*v[0] + m[4]*v[1] + m[8]*v[2] + m[12]*v[3];
d[1] = m[1]*v[0] + m[5]*v[1] + m[9]*v[2] + m[13]*v[3];
d[2] = m[2]*v[0] + m[6]*v[1] + m[10]*v[2] + m[14]*v[3];
d[3] = m[3]*v[0] + m[7]*v[1] + m[11]*v[2] + m[15]*v[3];
}
void
matvec4_neon(float m[16], float v[4], float d[4])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d0, d1}, [%1] \n\t" //Q0 = v
"vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m
"vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4
"vld1.32 {d22, d23}, [%0]! \n\t" //Q3 = m+8
"vld1.32 {d24, d25}, [%0]! \n\t" //Q4 = m+12
"vmul.f32 q13, q9, d0[0] \n\t" //Q5 = Q1*Q0[0]
"vmla.f32 q13, q10, d0[1] \n\t" //Q5 += Q1*Q0[1]
"vmla.f32 q13, q11, d1[0] \n\t" //Q5 += Q2*Q0[2]
"vmla.f32 q13, q12, d1[1] \n\t" //Q5 += Q3*Q0[3]
"vst1.32 {d26, d27}, [%2] \n\t" //Q4 = m+12
:
: "r"(m), "r"(v), "r"(d)
: "q0", "q9", "q10","q11", "q12", "q13", "memory"
);
#else
matvec4_c(m, v, d);
#endif
}

71
deps/math-neon/source/math_modf.c vendored Normal file
View File

@ -0,0 +1,71 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Assumes the floating point value |x| < 2,147,483,648
*/
#include "math_neon.h"
float modf_c(float x, int *i)
{
int n;
n = (int)x;
*i = n;
x = x - (float)n;
return x;
}
float modf_neon_hfp(float x, int *i)
{
#ifdef __MATH_NEON
asm volatile (
"vcvt.s32.f32 d1, d0 \n\t" //d1 = (int) d0;
"vcvt.f32.s32 d2, d1 \n\t" //d2 = (float) d1;
"vsub.f32 d0, d0, d2 \n\t" //d0 = d0 - d2;
"vstr.i32 s2, [r0] \n\t" //[r0] = d1[0]
::: "d0", "d1", "d2"
);
#endif
}
float modf_neon_sfp(float x, int *i)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, r0 \n\t" //d0 = {x, x}
"vcvt.s32.f32 d1, d0 \n\t" //d1 = (int) d0;
"vcvt.f32.s32 d2, d1 \n\t" //d2 = (float) d1;
"vsub.f32 d0, d0, d2 \n\t" //d0 = d0 - d2;
"vstr.i32 s2, [r1] \n\t" //[r0] = d1[0]
"vmov.f32 r0, s0 \n\t" //r0 = d0[0];
::: "d0", "d1", "d2"
);
#else
return modf_c(x, i);
#endif
}

435
deps/math-neon/source/math_neon.h vendored Normal file
View File

@ -0,0 +1,435 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef __MATH_NEON_H__
#define __MATH_NEON_H__
#if !defined(__i386__) && defined(__arm__)
//if defined neon ASM routines are used, otherwise all calls to *_neon
//functions are rerouted to their equivalent *_c function.
#define __MATH_NEON
//Default Floating Point value ABI: 0=softfp, 1=hardfp. Only effects *_neon routines.
//You can access the hardfp versions directly via the *_hard suffix.
//You can access the softfp versions directly via the *_soft suffix.
#define __MATH_FPABI 0
#endif
#ifdef GCC
#define ALIGN(A) __attribute__ ((aligned (A))
#else
#define ALIGN(A)
#endif
#ifndef _MATH_H
#define M_PI 3.14159265358979323846 /* pi */
#define M_PI_2 1.57079632679489661923 /* pi/2 */
#define M_PI_4 0.78539816339744830962 /* pi/4 */
#define M_E 2.7182818284590452354 /* e */
#define M_LOG2E 1.4426950408889634074 /* log_2 e */
#define M_LOG10E 0.43429448190325182765 /* log_10 e */
#define M_LN2 0.69314718055994530942 /* log_e 2 */
#define M_LN10 2.30258509299404568402 /* log_e 10 */
#define M_1_PI 0.31830988618379067154 /* 1/pi */
#define M_2_PI 0.63661977236758134308 /* 2/pi */
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
#endif
#if __MATH_FPABI == 1
#define sinf_neon sinf_neon_hfp
#define cosf_neon cosf_neon_hfp
#define sincosf_neon sincosf_neon_hfp
#define tanf_neon tanf_neon_hfp
#define atanf_neon atanf_neon_hfp
#define atan2f_neon atan2f_neon_hfp
#define asinf_neon asinf_neon_hfp
#define acosf_neon acosf_neon_hfp
#define sinhf_neon sinhf_neon_hfp
#define coshf_neon coshf_neon_hfp
#define tanhf_neon tanhf_neon_hfp
#define expf_neon expf_neon_hfp
#define logf_neon logf_neon_hfp
#define log10f_neon log10f_neon_hfp
#define powf_neon powf_neon_hfp
#define floorf_neon floorf_neon_hfp
#define ceilf_neon ceilf_neon_hfp
#define fabsf_neon fabsf_neon_hfp
#define ldexpf_neon ldexpf_neon_hfp
#define frexpf_neon frexpf_neon_hfp
#define fmodf_neon fmodf_neon_hfp
#define modf_neon modf_neon_hfp
#define sqrtf_neon sqrtf_neon_hfp
#define invsqrtf_neon invsqrtf_neon_hfp
#else
#define sinf_neon sinf_neon_sfp
#define cosf_neon cosf_neon_sfp
#define sincosf_neon sincosf_neon_sfp
#define tanf_neon tanf_neon_sfp
#define atanf_neon atanf_neon_sfp
#define atan2f_neon atan2f_neon_sfp
#define asinf_neon asinf_neon_sfp
#define acosf_neon acosf_neon_sfp
#define sinhf_neon sinhf_neon_sfp
#define coshf_neon coshf_neon_sfp
#define tanhf_neon tanhf_neon_sfp
#define expf_neon expf_neon_sfp
#define logf_neon logf_neon_sfp
#define log10f_neon log10f_neon_sfp
#define powf_neon powf_neon_sfp
#define floorf_neon floorf_neon_sfp
#define ceilf_neon ceilf_neon_sfp
#define fabsf_neon fabsf_neon_sfp
#define ldexpf_neon ldexpf_neon_sfp
#define frexpf_neon frexpf_neon_sfp
#define fmodf_neon fmodf_neon_sfp
#define modf_neon modf_neon_sfp
#define sqrtf_neon sqrtf_neon_sfp
#define invsqrtf_neon invsqrtf_neon_sfp
#define dot2_neon dot2_neon_sfp
#define dot3_neon dot3_neon_sfp
#define dot4_neon dot4_neon_sfp
#endif
/*
function: enable_runfast
this function enables the floating point runfast mode on the
ARM Cortex A8.
*/
void enable_runfast();
float dot2_c(float v0[2], float v1[2]);
float dot2_neon(float v0[2], float v1[2]);
float dot3_c(float v0[3], float v1[3]);
float dot3_neon(float v0[3], float v1[3]);
float dot4_c(float v0[4], float v1[4]);
float dot4_neon(float v0[4], float v1[4]);
void cross3_c(float v0[3], float v1[3], float d[3]);
void cross3_neon(float v0[3], float v1[3], float d[3]);
void normalize2_c(float v[2], float d[2]);
void normalize2_neon(float v[2], float d[2]);
void normalize3_c(float v[3], float d[3]);
void normalize3_neon(float v[3], float d[3]);
void normalize4_c(float v[4], float d[4]);
void normalize4_neon(float v[4], float d[4]);
/*
function: matmul2
arguments: m0 2x2 matrix, m1 2x2 matrix
return: d 2x2 matrix
expression: d = m0 * m1
*/
void matmul2_c(float m0[4], float m1[4], float d[4]);
void matmul2_neon(float m0[4], float m1[4], float d[4]);
/*
function: matmul3
arguments: m0 3x3 matrix, m1 3x3 matrix
return: d 3x3 matrix
expression: d = m0 * m1
*/
void matmul3_c(float m0[9], float m1[9], float d[9]);
void matmul3_neon(float m0[9], float m1[9], float d[9]);
/*
function: matmul4
arguments: m0 4x4 matrix, m1 4x4 matrix
return: d 4x4 matrix
expression: d = m0 * m1
*/
void matmul4_c(float m0[16], float m1[16], float d[16]);
void matmul4_neon(float m0[16], float m1[16], float d[16]);
/*
function: matvec2
arguments: m 2x2 matrix, v 2 element vector
return: d 2x2 matrix
expression: d = m * v
*/
void matvec2_c(float m[4], float v[2], float d[2]);
void matvec2_neon(float m[4], float v[2], float d[2]);
/*
function: matvec3
arguments: m 3x3 matrix, v 3 element vector
return: d 3x3 matrix
expression: d = m * v
*/
void matvec3_c(float m[9], float v[3], float d[3]);
void matvec3_neon(float m[9], float v[3], float d[3]);
/*
function: matvec4
arguments: m 4x4 matrix, v 4 element vector
return: d 4x4 matrix
expression: d = m * v
*/
void matvec4_c(float m[16], float v[4], float d[4]);
void matvec4_neon(float m[16], float v[4], float d[4]);
/*
function: sinf
arguments: x radians
return: the sine function evaluated at x radians.
expression: r = sin(x)
*/
float sinf_c(float x);
float sinf_neon_hfp(float x);
float sinf_neon_sfp(float x);
/*
function: cosf
arguments: x radians
return: the cosine function evaluated at x radians.
expression: r = cos(x)
notes: computed using cos(x) = sin(x + pi/2)
*/
float cosf_c(float x);
float cosf_neon_hfp(float x);
float cosf_neon_sfp(float x);
/*
function: sincosf
arguments: x radians, r[2] result array.
return: both the sine and the cosine evaluated at x radians.
expression: r = {sin(x), cos(x)}
notes: faster than evaluating seperately.
*/
void sincosf_c(float x, float r[2]);
void sincosf_neon_hfp(float x, float r[2]);
void sincosf_neon_sfp(float x, float r[2]);
/*
function: sinfv
return: the sine function evaluated at x[i] radians
expression: r[i] = sin(x[i])
notes: faster than evaluating individually.
r and x can be the same memory location.
*/
void sinfv_c(float *x, int n, float *r);
void sinfv_neon(float *x, int n, float *r);
/*
function: tanf
return: the tangent evaluated at x radians.
expression: r = tan(x)
notes: computed using tan(x) = sin(x) / cos(x)
*/
float tanf_c(float x);
float tanf_neon_hfp(float x);
float tanf_neon_sfp(float x);
/*
function: atanf
return: the arctangent evaluated at x.
expression: r = atan(x)
*/
float atanf_c(float x);
float atanf_neon_hfp(float x);
float atanf_neon_sfp(float x);
/*
function: atanf
return: the arctangent evaluated at x.
expression: r = atan(x)
*/
float atan2f_c(float y, float x);
float atan2f_neon_hfp(float y, float x);
float atan2f_neon_sfp(float y, float x);
/*
function: asinf
return: the arcsine evaluated at x.
expression: r = asin(x)
*/
float asinf_c(float x);
float asinf_neon_hfp(float x);
float asinf_neon_sfp(float x);
/*
function: acosf
return: the arcsine evaluated at x.
expression: r = asin(x)
*/
float acosf_c(float x);
float acosf_neon_hfp(float x);
float acosf_neon_sfp(float x);
/*
function: sinhf
return: the arcsine evaluated at x.
expression: r = asin(x)
*/
float sinhf_c(float x);
float sinhf_neon_hfp(float x);
float sinhf_neon_sfp(float x);
/*
function: coshf
return: the arcsine evaluated at x.
expression: r = asin(x)
*/
float coshf_c(float x);
float coshf_neon_hfp(float x);
float coshf_neon_sfp(float x);
/*
function: tanhf
return: the arcsine evaluated at x.
expression: r = asin(x)
*/
float tanhf_c(float x);
float tanhf_neon_hfp(float x);
float tanhf_neon_sfp(float x);
/*
function: expf
return: the natural exponential evaluated at x.
expression: r = e ** x
*/
float expf_c(float x);
float expf_neon_hfp(float x);
float expf_neon_sfp(float x);
/*
function: logf
return: the value of the natural logarithm of x.
expression: r = ln(x)
notes: assumes x > 0
*/
float logf_c(float x);
float logf_neon_hfp(float x);
float logf_neon_sfp(float x);
/*
function: log10f
return: the value of the power 10 logarithm of x.
expression: r = log10(x)
notes: assumes x > 0
*/
float log10f_c(float x);
float log10f_neon_hfp(float x);
float log10f_neon_sfp(float x);
/*
function: powf
return: x raised to the power of n, x ** n.
expression: r = x ** y
notes: computed using e ** (y * ln(x))
*/
float powf_c(float x, float n);
float powf_neon_sfp(float x, float n);
float powf_neon_hfp(float x, float n);
/*
function: floorf
return: x rounded down (towards negative infinity) to its nearest
integer value.
notes: assumes |x| < 2 ** 31
*/
float floorf_c(float x);
float floorf_neon_sfp(float x);
float floorf_neon_hfp(float x);
/*
function: ceilf
return: x rounded up (towards positive infinity) to its nearest
integer value.
notes: assumes |x| < 2 ** 31
*/
float ceilf_c(float x);
float ceilf_neon_hfp(float x);
float ceilf_neon_sfp(float x);
/*
function: fabsf
return: absolute vvalue of x
notes: assumes |x| < 2 ** 31
*/
float fabsf_c(float x);
float fabsf_neon_hfp(float x);
float fabsf_neon_sfp(float x);
/*
function: ldexpf
return: the value of m multiplied by 2 to the power of e.
expression: r = m * (2 ** e)
*/
float ldexpf_c(float m, int e);
float ldexpf_neon_hfp(float m, int e);
float ldexpf_neon_sfp(float m, int e);
/*
function: frexpf
return: the exponent and mantissa of x
*/
float frexpf_c(float x, int *e);
float frexpf_neon_hfp(float x, int *e);
float frexpf_neon_sfp(float x, int *e);
/*
function: fmodf
return: the remainder of x divided by y, x % y
expression: r = x - floor(x / y) * y;
notes: assumes that |x / y| < 2 ** 31
*/
float fmodf_c(float x, float y);
float fmodf_neon_hfp(float x, float y);
float fmodf_neon_sfp(float x, float y);
/*
function: modf
return: breaks x into the integer (i) and fractional part (return)
notes: assumes that |x| < 2 ** 31
*/
float modf_c(float x, int *i);
float modf_neon_hfp(float x, int *i);
float modf_neon_sfp(float x, int *i);
/*
function: sqrtf
return: (x^0.5)
notes:
*/
float sqrtf_c(float x);
float sqrtf_neon_hfp(float x);
float sqrtf_neon_sfp(float x);
/*
function: invsqrtf
return: 1.0f / (x^0.5)
notes:
*/
float invsqrtf_c(float x);
float invsqrtf_neon_hfp(float x);
float invsqrtf_neon_sfp(float x);
#endif

182
deps/math-neon/source/math_powf.c vendored Normal file
View File

@ -0,0 +1,182 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Based on x ^ n = exp(n * log(x))
Test func : powf(x, n)
Test Range: (1,1) < (x, n) < (10, 10)
Peak Error: ~0.0010%
RMS Error: ~0.0002%
*/
#include "math.h"
#include "math_neon.h"
const float __powf_rng[2] = {
1.442695041f,
0.693147180f
};
const float __powf_lut[16] = {
-2.295614848256274, //p0 log
-2.470711633419806, //p4
-5.686926051100417, //p2
-0.165253547131978, //p6
+5.175912446351073, //p1
+0.844006986174912, //p5
+4.584458825456749, //p3
+0.014127821926000, //p7
0.9999999916728642, //p0 exp
0.04165989275009526, //p4
0.5000006143673624, //p2
0.0014122663401803872, //p6
1.000000059694879, //p1
0.008336936973260111, //p5
0.16666570253074878, //p3
0.00019578093328483123 //p7
};
float powf_c(float x, float n)
{
float a, b, c, d, xx;
int m;
union {
float f;
int i;
} r;
//extract exponent
r.f = x;
m = (r.i >> 23);
m = m - 127;
r.i = r.i - (m << 23);
//Taylor Polynomial (Estrins)
xx = r.f * r.f;
a = (__powf_lut[4] * r.f) + (__powf_lut[0]);
b = (__powf_lut[6] * r.f) + (__powf_lut[2]);
c = (__powf_lut[5] * r.f) + (__powf_lut[1]);
d = (__powf_lut[7] * r.f) + (__powf_lut[3]);
a = a + b * xx;
c = c + d * xx;
xx = xx * xx;
r.f = a + c * xx;
//add exponent
r.f = r.f + ((float) m) * __powf_rng[1];
r.f = r.f * n;
//Range Reduction:
m = (int) (r.f * __powf_rng[0]);
r.f = r.f - ((float) m) * __powf_rng[1];
//Taylor Polynomial (Estrins)
a = (__powf_lut[12] * r.f) + (__powf_lut[8]);
b = (__powf_lut[14] * r.f) + (__powf_lut[10]);
c = (__powf_lut[13] * r.f) + (__powf_lut[9]);
d = (__powf_lut[15] * r.f) + (__powf_lut[11]);
xx = r.f * r.f;
a = a + b * xx;
c = c + d * xx;
xx = xx* xx;
r.f = a + c * xx;
//multiply by 2 ^ m
m = m << 23;
r.i = r.i + m;
return r.f;
}
float powf_neon_hfp(float x, float n)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d16, d0[1] \n\t" //d16 = {y,y};
"vdup.f32 d0, d0[0] \n\t" //d0 = {x,x};
//extract exponent
"vmov.i32 d2, #127 \n\t" //d2 = 127;
"vshr.u32 d6, d0, #23 \n\t" //d6 = d0 >> 23;
"vsub.i32 d6, d6, d2 \n\t" //d6 = d6 - d2;
"vshl.u32 d1, d6, #23 \n\t" //d1 = d6 << 23;
"vsub.i32 d0, d0, d1 \n\t" //d0 = d0 + d1;
//polynomial:
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0*d0 = {x^2, x^2}
"vld1.32 {d2, d3, d4, d5}, [%1]! \n\t" //q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
"vmla.f32 q1, q2, d0[0] \n\t" //q1 = q1 + q2 * d0[0]
"vmla.f32 d2, d3, d1[0] \n\t" //d2 = d2 + d3 * d1[0]
"vmul.f32 d1, d1, d1 \n\t" //d1 = d1 * d1 = {x^4, x^4}
"vmla.f32 d2, d1, d2[1] \n\t" //d2 = d2 + d1 * d2[1]
//add exponent
"vld1.32 d7, [%0] \n\t" //d7 = {invrange, range}
"vcvt.f32.s32 d6, d6 \n\t" //d6 = (float) d6
"vmla.f32 d2, d6, d7[1] \n\t" //d2 = d2 + d6 * d7[1]
"vdup.f32 d0, d2[0] \n\t" //d0 = d2[0]
"vmul.f32 d0, d0, d16 \n\t" //d0 = d0 * d16
//Range Reduction:
"vmul.f32 d6, d0, d7[0] \n\t" //d6 = d0 * d7[0]
"vcvt.u32.f32 d6, d6 \n\t" //d6 = (int) d6
"vcvt.f32.u32 d1, d6 \n\t" //d1 = (float) d6
"vmls.f32 d0, d1, d7[1] \n\t" //d0 = d0 - d1 * d7[1]
//polynomial:
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0*d0 = {x^2, x^2}
"vld1.32 {d2, d3, d4, d5}, [%1] \n\t" //q1 = {p0, p4, p2, p6}, q2 = {p1, p5, p3, p7} ;
"vmla.f32 q1, q2, d0[0] \n\t" //q1 = q1 + q2 * d0[0]
"vmla.f32 d2, d3, d1[0] \n\t" //d2 = d2 + d3 * d1[0]
"vmul.f32 d1, d1, d1 \n\t" //d1 = d1 * d1 = {x^4, x^4}
"vmla.f32 d2, d1, d2[1] \n\t" //d2 = d2 + d1 * d2[1]
//multiply by 2 ^ m
"vshl.i32 d6, d6, #23 \n\t" //d6 = d6 << 23
"vadd.i32 d0, d2, d6 \n\t" //d0 = d2 + d6
:: "r"(__powf_rng), "r"(__powf_lut)
: "d0", "d1", "d2","d3", "d4", "d5", "d6", "d7"
);
#endif
}
float powf_neon_sfp(float x, float n)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
asm volatile ("vmov.f32 s1, r1 \n\t");
powf_neon_hfp(x, n);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return powf_c(x, n);
#endif
};

42
deps/math-neon/source/math_runfast.c vendored Normal file
View File

@ -0,0 +1,42 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
void
enable_runfast()
{
#ifdef __arm__
static const unsigned int x = 0x04086060;
static const unsigned int y = 0x03000000;
int r;
asm volatile (
"fmrx %0, fpscr \n\t" //r0 = FPSCR
"and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060
"orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000
"fmxr fpscr, %0 \n\t" //FPSCR = r0
: "=r"(r)
: "r"(x), "r"(y)
);
#endif
}

163
deps/math-neon/source/math_sincosf.c vendored Normal file
View File

@ -0,0 +1,163 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __sincosf_rng[2] = {
2.0 / M_PI,
M_PI / 2.0
};
const float __sincosf_lut[8] = {
-0.00018365f, //p7
-0.00018365f, //p7
+0.00830636f, //p5
+0.00830636f, //p5
-0.16664831f, //p3
-0.16664831f, //p3
+0.99999661f, //p1
+0.99999661f, //p1
};
void sincosf_c( float x, float r[2])
{
union {
float f;
int i;
} ax, bx;
float y;
float a, b, c, d, xx, yy;
int m, n, o, p;
y = x + __sincosf_rng[1];
ax.f = fabsf(x);
bx.f = fabsf(y);
//Range Reduction:
m = (int) (ax.f * __sincosf_rng[0]);
o = (int) (bx.f * __sincosf_rng[0]);
ax.f = ax.f - (((float)m) * __sincosf_rng[1]);
bx.f = bx.f - (((float)o) * __sincosf_rng[1]);
//Test Quadrant
n = m & 1;
p = o & 1;
ax.f = ax.f - n * __sincosf_rng[1];
bx.f = bx.f - p * __sincosf_rng[1];
m = m >> 1;
o = o >> 1;
n = n ^ m;
p = p ^ o;
m = (x < 0.0);
o = (y < 0.0);
n = n ^ m;
p = p ^ o;
n = n << 31;
p = p << 31;
ax.i = ax.i ^ n;
bx.i = bx.i ^ p;
//Taylor Polynomial
xx = ax.f * ax.f;
yy = bx.f * bx.f;
r[0] = __sincosf_lut[0];
r[1] = __sincosf_lut[1];
r[0] = r[0] * xx + __sincosf_lut[2];
r[1] = r[1] * yy + __sincosf_lut[3];
r[0] = r[0] * xx + __sincosf_lut[4];
r[1] = r[1] * yy + __sincosf_lut[5];
r[0] = r[0] * xx + __sincosf_lut[6];
r[1] = r[1] * yy + __sincosf_lut[7];
r[0] = r[0] * ax.f;
r[1] = r[1] * bx.f;
}
void sincosf_neon_hfp(float x, float r[2])
{
//HACK: Assumes for softfp that r1 = x, and for hardfp that s0 = x.
#ifdef __MATH_NEON
asm volatile (
//{x, y} = {x, x + pi/2}
"vdup.f32 d1, d0[0] \n\t" //d1 = {x, x}
"vld1.32 d3, [%1] \n\t" //d3 = {invrange, range}
"vadd.f32 d0, d1, d3 \n\t" //d0 = d1 + d3
"vmov.f32 s0, s2 \n\t" //d0[0] = d1[0]
"vabs.f32 d1, d0 \n\t" //d1 = {abs(x), abs(y)}
//Range Reduction:
"vmul.f32 d2, d1, d3[0] \n\t" //d2 = d1 * d3[0]
"vcvt.u32.f32 d2, d2 \n\t" //d2 = (int) d2
"vcvt.f32.u32 d4, d2 \n\t" //d4 = (float) d2
"vmls.f32 d1, d4, d3[1] \n\t" //d1 = d1 - d4 * d3[1]
//Checking Quadrant:
//ax = ax - (k&1) * M_PI_2
"vmov.i32 d4, #1 \n\t" //d4 = 1
"vand.i32 d4, d4, d2 \n\t" //d4 = d4 & d2
"vcvt.f32.u32 d5, d4 \n\t" //d5 = (float) d4
"vmls.f32 d1, d5, d3[1] \n\t" //d1 = d1 - d5 * d3[1]
//ax = ax ^ ((k & 1) ^ (k >> 1) ^ (x < 0) << 31)
"vshr.u32 d3, d2, #1 \n\t" //d3 = d2 >> 1
"veor.i32 d4, d4, d3 \n\t" //d4 = d4 ^ d3
"vclt.f32 d3, d0, #0 \n\t" //d3 = (d0 < 0.0)
"veor.i32 d4, d4, d3 \n\t" //d4 = d4 ^ d3
"vshl.i32 d4, d4, #31 \n\t" //d4 = d4 << 31
"veor.i32 d0, d1, d4 \n\t" //d0 = d1 ^ d4
//polynomial:
"vldm %2!, {d2, d3} \n\t" //d2 = {p7, p7}, d3 = {p5, p5}, r3 += 4;
"vmul.f32 d1, d0, d0 \n\t" //d1 = d0 * d0 = {x^2, y^2}
"vldm %2!, {d4} \n\t" //d4 = {p3, p3}, r3 += 2;
"vmla.f32 d3, d2, d1 \n\t" //d3 = d3 + d2 * d1;
"vldm %2!, {d5} \n\t" //d5 = {p1, p1}, r3 += 2;
"vmla.f32 d4, d3, d1 \n\t" //d4 = d4 + d3 * d1;
"vmla.f32 d5, d4, d1 \n\t" //d5 = d5 + d4 * d1;
"vmul.f32 d5, d5, d0 \n\t" //d5 = d5 * d0;
"vstm.f32 %0, {d5} \n\t" //r[0] = d5[0], r[1]=d5[1];
: "+r"(r)
: "r"(__sincosf_rng), "r"(__sincosf_lut)
: "d0", "d1", "d2", "d3", "d4", "d5"
);
#else
sincosf_c(x, r);
#endif
}
void sincosf_neon_sfp(float x, float r[2])
{
#ifdef __MATH_NEON
asm volatile ("vdup.f32 d0, r0 \n\t");
sincosf_neon_hfp(x, r);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
sincosf_c(x, r);
#endif
};

128
deps/math-neon/source/math_sinf.c vendored Normal file
View File

@ -0,0 +1,128 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <math.h>
#include "math_neon.h"
static const float __sinf_rng[2] = {
2.0 / M_PI,
M_PI / 2.0
} ALIGN(16);
static const float __sinf_lut[4] = {
-0.00018365f, //p7
-0.16664831f, //p3
+0.00830636f, //p5
+0.99999661f, //p1
} ALIGN(16);
float sinf_c(float x)
{
union {
float f;
int i;
} ax;
float r, a, b, xx;
int m, n;
ax.f = fabsf(x);
//Range Reduction:
m = (int) (ax.f * __sinf_rng[0]);
ax.f = ax.f - (((float)m) * __sinf_rng[1]);
//Test Quadrant
n = m & 1;
ax.f = ax.f - n * __sinf_rng[1];
m = m >> 1;
n = n ^ m;
m = (x < 0.0);
n = n ^ m;
n = n << 31;
ax.i = ax.i ^ n;
//Taylor Polynomial (Estrins)
xx = ax.f * ax.f;
a = (__sinf_lut[0] * ax.f) * xx + (__sinf_lut[2] * ax.f);
b = (__sinf_lut[1] * ax.f) * xx + (__sinf_lut[3] * ax.f);
xx = xx * xx;
r = b + a * xx;
return r;
}
float sinf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 d3, [%0] \n\t" //d3 = {invrange, range}
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x}
"vabs.f32 d1, d0 \n\t" //d1 = {ax, ax}
"vmul.f32 d2, d1, d3[0] \n\t" //d2 = d1 * d3[0]
"vcvt.u32.f32 d2, d2 \n\t" //d2 = (int) d2
"vmov.i32 d5, #1 \n\t" //d5 = 1
"vcvt.f32.u32 d4, d2 \n\t" //d4 = (float) d2
"vshr.u32 d7, d2, #1 \n\t" //d7 = d2 >> 1
"vmls.f32 d1, d4, d3[1] \n\t" //d1 = d1 - d4 * d3[1]
"vand.i32 d5, d2, d5 \n\t" //d5 = d2 & d5
"vclt.f32 d18, d0, #0 \n\t" //d18 = (d0 < 0.0)
"vcvt.f32.u32 d6, d5 \n\t" //d6 = (float) d5
"vmls.f32 d1, d6, d3[1] \n\t" //d1 = d1 - d6 * d3[1]
"veor.i32 d5, d5, d7 \n\t" //d5 = d5 ^ d7
"vmul.f32 d2, d1, d1 \n\t" //d2 = d1*d1 = {x^2, x^2}
"vld1.32 {d16, d17}, [%1] \n\t" //q8 = {p7, p3, p5, p1}
"veor.i32 d5, d5, d18 \n\t" //d5 = d5 ^ d18
"vshl.i32 d5, d5, #31 \n\t" //d5 = d5 << 31
"veor.i32 d1, d1, d5 \n\t" //d1 = d1 ^ d5
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q8, d1[0] \n\t" //q0 = q8 * d1[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d0 = {...., p1x + p3x^3 + p5x^5 + p7x^7}
"vmov.f32 s0, s3 \n\t" //s0 = s3
:
: "r"(__sinf_rng), "r"(__sinf_lut)
: "q0", "q1", "q2", "q3", "q8", "q9"
);
#endif
}
float sinf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vdup.f32 d0, r0 \n\t");
sinf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return sinf_c(x);
#endif
};

110
deps/math-neon/source/math_sinfv.c vendored Normal file
View File

@ -0,0 +1,110 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __sinfv_rng[2] = {
2.0 / M_PI,
M_PI / 2.0,
};
const float __sinfv_lut[4] = {
-0.00018365f, //p7
-0.16664831f, //p3
+0.00830636f, //p5
+0.99999661f, //p1
};
void sinfv_c(float *x, int n, float *r)
{
union {
float f;
int i;
} ax, bx;
float aa, ab, ba, bb, axx, bxx;
int am, bm, an, bn;
if (n & 0x1) {
*r++ = sinf_c(*x++);
n--;
}
float rng0 = __sinfv_rng[0];
float rng1 = __sinfv_rng[1];
while(n > 0){
float x0 = *x++;
float x1 = *x++;
ax.f = fabsf(x0);
bx.f = fabsf(x1);
//Range Reduction:
am = (int) (ax.f * rng0);
bm = (int) (bx.f * rng0);
ax.f = ax.f - (((float)am) * rng1);
bx.f = bx.f - (((float)bm) * rng1);
//Test Quadrant
an = am & 1;
bn = bm & 1;
ax.f = ax.f - an * rng1;
bx.f = bx.f - bn * rng1;
am = (am & 2) >> 1;
bm = (bm & 2) >> 1;
ax.i = ax.i ^ ((an ^ am ^ (x0 < 0)) << 31);
bx.i = bx.i ^ ((bn ^ bm ^ (x1 < 0)) << 31);
//Taylor Polynomial (Estrins)
axx = ax.f * ax.f;
bxx = bx.f * bx.f;
aa = (__sinfv_lut[0] * ax.f) * axx + (__sinfv_lut[2] * ax.f);
ba = (__sinfv_lut[0] * bx.f) * bxx + (__sinfv_lut[2] * bx.f);
ab = (__sinfv_lut[1] * ax.f) * axx + (__sinfv_lut[3] * ax.f);
bb = (__sinfv_lut[1] * bx.f) * bxx + (__sinfv_lut[3] * bx.f);
axx = axx * axx;
bxx = bxx * bxx;
*r++ = ab + aa * axx;
*r++ = bb + ba * bxx;
n -= 2;
}
}
void sinfv_neon(float *x, int n, float *r)
{
#ifdef __MATH_NEON
asm volatile (""
:
:"r"(x), "r"(n)
);
#else
sinfv_c(x, n, r);
#endif
}

120
deps/math-neon/source/math_sinhf.c vendored Normal file
View File

@ -0,0 +1,120 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __sinhf_rng[2] = {
1.442695041f,
0.693147180f
};
const float __sinhf_lut[16] = {
0.00019578093328483123, //p7
0.00019578093328483123, //p7
0.0014122663401803872, //p6
0.0014122663401803872, //p6
0.008336936973260111, //p5
0.008336936973260111, //p5
0.04165989275009526, //p4
0.04165989275009526, //p4
0.16666570253074878, //p3
0.16666570253074878, //p3
0.5000006143673624, //p2
0.5000006143673624, //p2
1.000000059694879, //p1
1.000000059694879, //p1
0.9999999916728642, //p0
0.9999999916728642 //p0
};
float sinhf_c(float x)
{
float a, b, xx;
xx = -x;
a = expf_c(x);
b = expf_c(xx);
a = a - b;
a = a * 0.5f;
return a;
}
float sinhf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x}
"fnegs s1, s1 \n\t" //s1 = -s1
//Range Reduction:
"vld1.32 d2, [%0] \n\t" //d2 = {invrange, range}
"vld1.32 {d16, d17}, [%1]! \n\t"
"vmul.f32 d6, d0, d2[0] \n\t" //d6 = d0 * d2[0]
"vcvt.s32.f32 d6, d6 \n\t" //d6 = (int) d6
"vld1.32 {d18}, [%1]! \n\t"
"vcvt.f32.s32 d1, d6 \n\t" //d1 = (float) d6
"vld1.32 {d19}, [%1]! \n\t"
"vmls.f32 d0, d1, d2[1] \n\t" //d0 = d0 - d1 * d2[1]
"vld1.32 {d20}, [%1]! \n\t"
//polynomial:
"vmla.f32 d17, d16, d0 \n\t" //d17 = d17 + d16 * d0;
"vld1.32 {d21}, [%1]! \n\t"
"vmla.f32 d18, d17, d0 \n\t" //d18 = d18 + d17 * d0;
"vld1.32 {d22}, [%1]! \n\t"
"vmla.f32 d19, d18, d0 \n\t" //d19 = d19 + d18 * d0;
"vld1.32 {d23}, [%1]! \n\t"
"vmla.f32 d20, d19, d0 \n\t" //d20 = d20 + d19 * d0;
"vmla.f32 d21, d20, d0 \n\t" //d21 = d21 + d20 * d0;
"vmla.f32 d22, d21, d0 \n\t" //d22 = d22 + d21 * d0;
"vmla.f32 d23, d22, d0 \n\t" //d23 = d23 + d22 * d0;
//multiply by 2 ^ m
"vshl.i32 d6, d6, #23 \n\t" //d6 = d6 << 23
"vadd.i32 d0, d23, d6 \n\t" //d0 = d22 + d6
"vdup.f32 d2, d0[1] \n\t" //d2 = s1
"vmov.f32 d1, #0.5 \n\t" //d1 = 0.5
"vsub.f32 d0, d0, d2 \n\t" //d0 = d0 - d2
"vmul.f32 d0, d1 \n\t" //d0 = d0 * d1
:: "r"(__sinhf_rng), "r"(__sinhf_lut)
: "d0", "d1", "q1", "q2", "d6"
);
#endif
}
float sinhf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
sinhf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return sinhf_c(x);
#endif
};

105
deps/math-neon/source/math_sqrtf.c vendored Normal file
View File

@ -0,0 +1,105 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Test func : sqrtf(x)
Test Range: 0 < x < 1,000,000,000
Peak Error: ~0.0010%
RMS Error: ~0.0005%
*/
#include "math.h"
#include "math_neon.h"
float sqrtf_c(float x)
{
float b, c;
int m;
union {
float f;
int i;
} a;
//fast invsqrt approx
a.f = x;
a.i = 0x5F3759DF - (a.i >> 1); //VRSQRTE
c = x * a.f;
b = (3.0f - c * a.f) * 0.5; //VRSQRTS
a.f = a.f * b;
c = x * a.f;
b = (3.0f - c * a.f) * 0.5;
a.f = a.f * b;
//fast inverse approx
x = a.f;
m = 0x3F800000 - (a.i & 0x7F800000);
a.i = a.i + m;
a.f = 1.41176471f - 0.47058824f * a.f;
a.i = a.i + m;
b = 2.0 - a.f * x;
a.f = a.f * b;
b = 2.0 - a.f * x;
a.f = a.f * b;
return a.f;
}
float sqrtf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
//fast invsqrt approx
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
//fast reciporical approximation
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d0, d1, d2 \n\t" //d0 = d1 * d2;
::: "d0", "d1", "d2", "d3"
);
#endif
}
float sqrtf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
sqrtf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return sqrtf_c(x);
#endif
};

147
deps/math-neon/source/math_sqrtfv.c vendored Normal file
View File

@ -0,0 +1,147 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Test func : sqrtf(x)
Test Range: 0 < x < 1,000,000,000
Peak Error: ~0.0010%
RMS Error: ~0.0005%
*/
#include "math.h"
#include "math_neon.h"
void sqrtfv_c(float *x, int n, float *r)
{
float x0, x1;
float b0, b1, c0, c1;
int m0, m1;
union {
float f;
int i;
} a0, a1;
if (n & 0x1){
*r++ = sqrtf_c(*x++);
n--;
}
while(n > 0){
x0 = *x++;
x1 = *x++;
//fast invsqrt approx
a0.f = x0;
a1.f = x1;
a0.i = 0x5F3759DF - (a0.i >> 1); //VRSQRTE
a1.i = 0x5F3759DF - (a1.i >> 1); //VRSQRTE
c0 = x0 * a0.f;
c1 = x1 * a1.f;
b0 = (3.0f - c0 * a0.f) * 0.5; //VRSQRTS
b1 = (3.0f - c1 * a1.f) * 0.5; //VRSQRTS
a0.f = a0.f * b0;
a1.f = a1.f * b1;
c0 = x0 * a0.f;
c1 = x1 * a1.f;
b0 = (3.0f - c0 * a0.f) * 0.5; //VRSQRTS
b1 = (3.0f - c1 * a1.f) * 0.5; //VRSQRTS
a0.f = a0.f * b0;
a1.f = a1.f * b1;
//fast inverse approx
c0 = a0.f;
c0 = a1.f;
m0 = 0x3F800000 - (a0.i & 0x7F800000);
m1 = 0x3F800000 - (a1.i & 0x7F800000);
a0.i = a0.i + m0;
a1.i = a1.i + m1;
a0.f = 1.41176471f - 0.47058824f * a0.f;
a1.f = 1.41176471f - 0.47058824f * a1.f;
a0.i = a0.i + m0;
a1.i = a1.i + m1;
b0 = 2.0 - a0.f * c0;
b1 = 2.0 - a1.f * c1;
a0.f = a0.f * b0;
a1.f = a1.f * b1;
b0 = 2.0 - a0.f * c0;
b1 = 2.0 - a1.f * c1;
a0.f = a0.f * b0;
a1.f = a1.f * b1;
*r++ = a0.f;
*r++ = a1.f;
n -= 2;
}
}
void sqrtfv_neon(float *x, int n, float *r)
{
#ifdef __MATH_NEON
asm volatile (
"tst r1, #1 \n\t" //r1 & 1
"beq 1f \n\t" //
"vld1.32 d0[0], [r0]! \n\t" //s0 = *x++
"mov ip, lr \n\t" //ip = lr
//"bl sqrtf_neon_hfp \n\t" //sqrtf_neon
"mov lr, ip \n\t" //lr = ip
"vst1.32 d0[0], [r2]! \n\t" //*r++ = r0
"subs r1, r1, #1 \n\t" //r1 = r1 - 1;
"bxeq lr \n\t" //
"1: \n\t" //
"vld1.32 d0, [r0]! \n\t" //d0 = (*x[0], *x[1]), x+=2;
//fast invsqrt approx
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d3 = d0 * d2
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
"vmul.f32 d2, d0, d1 \n\t" //d3 = d0 * d2
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
//fast reciporical approximation
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d0, d1, d2 \n\t" //d0 = d1 * d2;
"vst1.64 d0, [r2]! \n\t" //*r++ = d0;
"subs r1, r1, #2 \n\t" //n = n - 2; update flags
"bgt 1b \n\t" //
::: "d0", "d1", "d2", "d3"
);
#else
sqrtfv_c(x, n, r);
#endif
}

156
deps/math-neon/source/math_tanf.c vendored Normal file
View File

@ -0,0 +1,156 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
const float __tanf_rng[2] = {
2.0 / M_PI,
M_PI / 2.0
};
const float __tanf_lut[4] = {
-0.00018365f, //p7
-0.16664831f, //p3
+0.00830636f, //p5
+0.99999661f, //p1
};
float tanf_c(float x){
union {
float f;
int i;
} ax, c;
float r, a, b, xx, cc, cx;
int m;
ax.f = fabsf(x);
//Range Reduction:
m = (int) (ax.f * __tanf_rng[0]);
ax.f = ax.f - (((float)m) * __tanf_rng[1]);
//Test Quadrant
ax.f = ax.f - (m & 1) * __tanf_rng[1];
ax.i = ax.i ^ ((*(int*)&x) & 0x80000000);
//Taylor Polynomial (Estrins)
xx = ax.f * ax.f;
a = (__tanf_lut[0] * ax.f) * xx + (__tanf_lut[2] * ax.f);
b = (__tanf_lut[1] * ax.f) * xx + (__tanf_lut[3] * ax.f);
xx = xx * xx;
r = b + a * xx;
//cosine
c.f = 1.0 - r * r;
//fast invsqrt approximation (2x newton iterations)
cc = c.f;
c.i = 0x5F3759DF - (c.i >> 1); //VRSQRTE
cx = cc * c.f;
a = (3.0f - cx * c.f) / 2; //VRSQRTS
c.f = c.f * a;
cx = cc * c.f;
a = (3.0f - cx * c.f) / 2;
c.f = c.f * a;
r = r * c.f;
return r;
}
float tanf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile (
"vdup.f32 d0, d0[0] \n\t" //d0 = {x, x}
"vabs.f32 d1, d0 \n\t" //d1 = {ax, ax}
//Range Reduction:
"vld1.32 d3, [%0] \n\t" //d3 = {invrange, range}
"vmul.f32 d2, d1, d3[0] \n\t" //d2 = d1 * d3[0]
"vcvt.u32.f32 d2, d2 \n\t" //d2 = (int) d2
"vcvt.f32.u32 d4, d2 \n\t" //d4 = (float) d2
"vmls.f32 d1, d4, d3[1] \n\t" //d1 = d1 - d4 * d3[1]
//Checking Quadrant:
//ax = ax - (k&1) * M_PI_2
"vmov.i32 d4, #1 \n\t" //d4 = 1
"vand.i32 d2, d2, d4 \n\t" //d2 = d2 & d4
"vcvt.f32.u32 d2, d2 \n\t" //d2 = (float) d2
"vmls.f32 d1, d2, d3[1] \n\t" //d1 = d1 - d2 * d3[1]
//ax = ax ^ ( x.i & 0x800000000)
"vmov.i32 d4, #0x80000000 \n\t" //d4 = 0x80000000
"vand.i32 d0, d0, d4 \n\t" //d0 = d0 & d4
"veor.i32 d1, d1, d0 \n\t" //d1 = d1 ^ d0
//polynomial:
"vmul.f32 d2, d1, d1 \n\t" //d2 = d1*d1 = {x^2, x^2}
"vld1.32 {d4, d5}, [%1] \n\t" //d4 = {p7, p3}, d5 = {p5, p1}
"vmul.f32 d3, d2, d2 \n\t" //d3 = d2*d2 = {x^4, x^4}
"vmul.f32 q0, q2, d1[0] \n\t" //q0 = q2 * d1[0] = {p7x, p3x, p5x, p1x}
"vmla.f32 d1, d0, d2[0] \n\t" //d1 = d1 + d0*d2 = {p5x + p7x^3, p1x + p3x^3}
"vmla.f32 d1, d3, d1[0] \n\t" //d1 = d1 + d3*d0 = {..., p1x + p3x^3 + p5x^5 + p7x^7}
//cosine
"vmov.f32 s1, #1.0 \n\t" //d0[1] = 1.0
"vmls.f32 d0, d1, d1 \n\t" //d0 = {..., 1.0 - sx*sx}
//invsqrt approx
"vmov.f32 d2, d0 \n\t" //d2 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d3, d0, d2 \n\t" //d3 = d0 * d2
"vrsqrts.f32 d4, d3, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d4 \n\t" //d0 = d0 * d4
"vmul.f32 d3, d0, d2 \n\t" //d3 = d0 * d2
"vrsqrts.f32 d4, d3, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d4 \n\t" //d0 = d0 * d4
"vmul.f32 d0, d0, d1 \n\t" //d0 = d0 * d1
"vmov.f32 s0, s1 \n\t" //s0 = s1
:: "r"(__tanf_rng), "r"(__tanf_lut)
: "d0", "d1", "d2", "d3", "d4", "d5"
);
#endif
}
float tanf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vdup.f32 d0, r0 \n\t");
tanf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return tanf_c(x);
#endif
};

95
deps/math-neon/source/math_tanhf.c vendored Normal file
View File

@ -0,0 +1,95 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math.h"
#include "math_neon.h"
/*
TanH = (e^x - e^-x) / (e^x + e^-x)
TanH = (e^x - e^-x)(e^x) / (e^x + e^-x)(e^x)
TanH = (e^2x - 1) / (e^2x + 1)
*/
float tanhf_c(float x)
{
float a, b, c;
int m;
union{
float f;
int i;
} xx;
x = 2.0f * x;
a = expf_c(x);
c = a + 1.0f;
//reciporical approx.
xx.f = c;
m = 0x3F800000 - (xx.i & 0x7F800000);
xx.i = xx.i + m;
xx.f = 1.41176471f - 0.47058824f * xx.f;
xx.i = xx.i + m;
b = 2.0 - xx.f * c;
xx.f = xx.f * b;
b = 2.0 - xx.f * c;
xx.f = xx.f * b;
c = a - 1.0;
xx.f *= c;
return xx.f;
}
float tanhf_neon_hfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vadd.f32 d0, d0, d0 \n\t");
expf_neon_hfp(x);
asm volatile (
"vmov.f32 d2, #1.0 \n\t"
"vsub.f32 d3, d0, d2 \n\t"
"vadd.f32 d0, d0, d2 \n\t"
"vrecpe.f32 d1, d0 \n\t" //d1 = ~ 1 / d0;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d1, d1, d2 \n\t" //d1 = d1 * d2;
"vrecps.f32 d2, d1, d0 \n\t" //d2 = 2.0 - d1 * d0;
"vmul.f32 d0, d1, d2 \n\t" //d0 = d1 * d2;
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3;
::: "d0", "d1", "d2", "d3"
);
#endif
}
float tanhf_neon_sfp(float x)
{
#ifdef __MATH_NEON
asm volatile ("vmov.f32 s0, r0 \n\t");
tanhf_neon_hfp(x);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return tanhf_c(x);
#endif
};

118
deps/math-neon/source/math_vec2.c vendored Normal file
View File

@ -0,0 +1,118 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math_neon.h"
//vec2 scalar product
float
dot2_c(float v0[2], float v1[2])
{
float r;
r = v0[0]*v1[0];
r += v0[1]*v1[1];
return r;
}
void
normalize2_c(float v[2], float d[2])
{
float b, c, x;
union {
float f;
int i;
} a;
x = v[0]*v[0];
x += v[1]*v[1];
//fast invsqrt approx
a.f = x;
a.i = 0x5F3759DF - (a.i >> 1); //VRSQRTE
c = x * a.f;
b = (3.0f - c * a.f) * 0.5; //VRSQRTS
a.f = a.f * b;
c = x * a.f;
b = (3.0f - c * a.f) * 0.5;
a.f = a.f * b;
d[0] = v[0]*a.f;
d[1] = v[1]*a.f;
}
float
dot2_neon_hfp(float v0[2], float v1[2])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d2}, [%0] \n\t" //d2={x0,y0}
"vld1.32 {d4}, [%1] \n\t" //d4={x1,y1}
"vmul.f32 d0, d2, d4 \n\t" //d0 = d2*d4
"vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
:: "r"(v0), "r"(v1)
:
);
#endif
}
float
dot2_neon_sfp(float v0[2], float v1[2])
{
#ifdef __MATH_NEON
dot2_neon_hfp(v0, v1);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return dot2_c(v0, v1);
#endif
};
void
normalize2_neon(float v[2], float d[2])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 d4, [%0] \n\t" //d4 = {x0,y0}
"vmul.f32 d0, d4, d4 \n\t" //d0 = d2*d2
"vpadd.f32 d0, d0 \n\t" //d0 = d[0] + d[1]
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d4, d4, d0[0] \n\t" //d4 = d4*d0[0]
"vst1.32 d4, [%1] \n\t" //
:: "r"(v), "r"(d)
: "d0", "d1", "d2", "d3", "d4", "memory"
);
#else
normalize2_c(v, d);
#endif
}

172
deps/math-neon/source/math_vec3.c vendored Normal file
View File

@ -0,0 +1,172 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math_neon.h"
//vec4 scalar product
float
dot3_c(float v0[3], float v1[3])
{
float r;
r = v0[0]*v1[0];
r += v0[1]*v1[1];
r += v0[2]*v1[2];
return r;
}
void
cross3_c(float v0[3], float v1[3], float d[3])
{
d[0] = v0[1]*v1[2] - v0[2]*v1[1];
d[1] = v0[2]*v1[0] - v0[0]*v1[2];
d[2] = v0[0]*v1[1] - v0[1]*v1[0];
}
void
normalize3_c(float v[3], float d[3])
{
float b, c, x;
union {
float f;
int i;
} a;
x = v[0]*v[0];
x += v[1]*v[1];
x += v[2]*v[2];
//fast invsqrt approx
a.f = x;
a.i = 0x5F3759DF - (a.i >> 1); //VRSQRTE
c = x * a.f;
b = (3.0f - c * a.f) * 0.5; //VRSQRTS
a.f = a.f * b;
c = x * a.f;
b = (3.0f - c * a.f) * 0.5;
a.f = a.f * b;
d[0] = v[0]*a.f;
d[1] = v[1]*a.f;
d[2] = v[2]*a.f;
}
float
dot3_neon_hfp(float v0[3], float v1[3])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d2}, [%0] \n\t" //d2={x0,y0}
"flds s6, [%0, #8] \n\t" //d3[0]={z0}
"vld1.32 {d4}, [%1] \n\t" //d4={x1,y1}
"flds s10, [%1, #8] \n\t" //d5[0]={z1}
"vmul.f32 d0, d2, d4 \n\t" //d0= d2*d4
"vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
"vmla.f32 d0, d3, d5 \n\t" //d0 = d0 + d3*d5
:: "r"(v0), "r"(v1)
: "d0","d1","d2","d3","d4","d5"
);
#endif
}
float
dot3_neon_sfp(float v0[3], float v1[3])
{
#ifdef __MATH_NEON
dot3_neon_hfp(v0, v1);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return dot3_c(v0, v1);
#endif
};
void cross3_neon(float v0[3], float v1[3], float d[3])
{
#ifdef __MATH_NEON
asm volatile (
"flds s3, [%0] \n\t" //d1[1]={x0}
"add %0, %0, #4 \n\t" //
"vld1.32 {d0}, [%0] \n\t" //d0={y0,z0}
"vmov.f32 s2, s1 \n\t" //d1[0]={z0}
"flds s5, [%1] \n\t" //d2[1]={x1}
"add %1, %1, #4 \n\t" //
"vld1.32 {d3}, [%1] \n\t" //d3={y1,z1}
"vmov.f32 s4, s7 \n\t" //d2[0]=d3[1]
"vmul.f32 d4, d0, d2 \n\t" //d4=d0*d2
"vmls.f32 d4, d1, d3 \n\t" //d4-=d1*d3
"vmul.f32 d5, d3, d1[1] \n\t" //d5=d3*d1[1]
"vmls.f32 d5, d0, d2[1] \n\t" //d5-=d0*d2[1]
"vst1.32 d4, [%2] \n\t" //
"add %2, %2, #8 \n\t" //
"fsts s10, [%2] \n\t" //
: "+r"(v0), "+r"(v1), "+r"(d):
: "d0", "d1", "d2", "d3", "d4", "d5", "memory"
);
#else
cross3_c(v0,v1,d);
#endif
}
void
normalize3_neon(float v[3], float d[3])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d4}, [%0] \n\t" //d4={x0,y0}
"flds s10, [%0, #8] \n\t" //d5[0]={z0}
"vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4
"vpadd.f32 d0, d0 \n\t" //d0 = d[0] + d[1]
"vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
"vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
"vst1.32 {d4}, [%1] \n\t" //
"fsts s10, [%1, #8] \n\t" //
:: "r"(v), "r"(d)
: "d0", "d1", "d2", "d3", "d4", "d5", "memory"
);
#else
normalize3_c(v, d);
#endif
}

126
deps/math-neon/source/math_vec4.c vendored Normal file
View File

@ -0,0 +1,126 @@
/*
The MIT License (MIT)
Copyright (c) 2015 Lachlan Tychsen-Smith (lachlan.ts@gmail.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "math_neon.h"
#ifdef __MATH_NEON
#include "arm_neon.h"
#endif
//vec4 scalar product
float dot4_c(float v0[4], float v1[4])
{
float r;
r = v0[0]*v1[0];
r += v0[1]*v1[1];
r += v0[2]*v1[2];
r += v0[3]*v1[3];
return r;
}
void normalize4_c(float v[4], float d[4])
{
float b, c, x;
union {
float f;
int i;
} a;
x = v[0]*v[0];
x += v[1]*v[1];
x += v[2]*v[2];
x += v[3]*v[3];
//fast invsqrt approx
a.f = x;
a.i = 0x5F3759DF - (a.i >> 1); //VRSQRTE
c = x * a.f;
b = (3.0f - c * a.f) * 0.5; //VRSQRTS
a.f = a.f * b;
c = x * a.f;
b = (3.0f - c * a.f) * 0.5;
a.f = a.f * b;
d[0] = v[0]*a.f;
d[1] = v[1]*a.f;
d[2] = v[2]*a.f;
d[3] = v[3]*a.f;
}
void normalize4_neon(float v[4], float d[4])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d4, d5}, [%0] \n\t" //d2={x0,y0}, d3={z0, w0}
"vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4
"vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5
"vpadd.f32 d0, d0 \n\t" //d0 = d[0] + d[1]
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d4 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
"vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
"vst1.32 {d4, d5}, [%1] \n\t" //d2={x0,y0}, d3={z0, w0}
:: "r"(v), "r"(d)
: "d0", "d1", "d2", "d3", "d4", "d5", "memory"
);
#else
normalize4_c(v, d);
#endif
}
float dot4_neon_hfp(float v0[4], float v1[4])
{
#ifdef __MATH_NEON
asm volatile (
"vld1.32 {d2, d3}, [%0] \n\t" //d2={x0,y0}, d3={z0, w0}
"vld1.32 {d4, d5}, [%1] \n\t" //d4={x1,y1}, d5={z1, w1}
"vmul.f32 d0, d2, d4 \n\t" //d0= d2*d4
"vmla.f32 d0, d3, d5 \n\t" //d0 = d0 + d3*d5
"vpadd.f32 d0, d0 \n\t" //d0 = d[0] + d[1]
:: "r"(v0), "r"(v1) :
);
#endif
}
float dot4_neon_sfp(float v0[4], float v1[4])
{
#ifdef __MATH_NEON
dot4_neon_hfp(v0, v1);
asm volatile ("vmov.f32 r0, s0 \n\t");
#else
return dot4_c(v0, v1);
#endif
};

99
deps/vitaGL/.clang-format vendored Normal file
View File

@ -0,0 +1,99 @@
---
Language: Cpp
# BasedOnStyle: WebKit
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '$'
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Always
...

17
deps/vitaGL/.gitattributes vendored Normal file
View File

@ -0,0 +1,17 @@
# Auto detect text files and perform LF normalization
* text=auto
# Custom for Visual Studio
*.cs diff=csharp
# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain

57
deps/vitaGL/.gitignore vendored Normal file
View File

@ -0,0 +1,57 @@
*.vpk
*.elf
*.velf
*.bin
*.sfo
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
# =========================
# Operating System Files
# =========================
# OSX
# =========================
.DS_Store
.AppleDouble
.LSOverride
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Vita build stuffs
*.a
*.o

165
deps/vitaGL/LICENSE.txt vendored Normal file
View File

@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

69
deps/vitaGL/Makefile vendored Normal file
View File

@ -0,0 +1,69 @@
TARGET := libvitaGL
SOURCES := source source/utils
SHADERS := shaders
LIBS = -lc -lm -lSceGxm_stub -lSceDisplay_stub
ifeq ($(HAVE_SBRK),1)
SOURCES += source/hacks
endif
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CGFILES := $(foreach dir,$(SHADERS), $(wildcard $(dir)/*.cg))
HEADERS := $(CGFILES:.cg=.h)
OBJS := $(CFILES:.c=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
AR = $(PREFIX)-gcc-ar
CFLAGS = -g -Wl,-q -O2 -ffast-math -mtune=cortex-a9 -mfpu=neon -flto -ftree-vectorize -DTRANSPOSE_MATRICES
ASFLAGS = $(CFLAGS)
all: $(TARGET).a
$(TARGET).a: $(OBJS)
$(AR) -rc $@ $^
%_f.h:
psp2cgc -profile sce_fp_psp2 $(@:_f.h=_f.cg) -Wperf -fastprecision -O3 -o $(@:_f.h=_f.gxp)
bin2c $(@:_f.h=_f.gxp) source/shaders/$(notdir $(@)) $(notdir $(@:_f.h=_f))
@rm -rf $(@:_f.h=_f.gxp)
%_v.h:
psp2cgc -profile sce_vp_psp2 $(@:_v.h=_v.cg) -Wperf -fastprecision -O3 -o $(@:_v.h=_v.gxp)
bin2c $(@:_v.h=_v.gxp) source/shaders/$(notdir $(@:_v.h=_v.h)) $(notdir $(@:_v.h=_v))
@rm -rf $(@:_v.h=_v.gxp)
shaders: $(HEADERS)
clean:
@rm -rf $(TARGET).a $(TARGET).elf $(OBJS)
@make -C samples/sample1 clean
@make -C samples/sample2 clean
@make -C samples/sample3 clean
@make -C samples/sample4 clean
@make -C samples/sample5 clean
@make -C samples/sample6 clean
@make -C samples/sample7 clean
install: $(TARGET).a
@mkdir -p $(VITASDK)/$(PREFIX)/lib/
cp $(TARGET).a $(VITASDK)/$(PREFIX)/lib/
@mkdir -p $(VITASDK)/$(PREFIX)/include/
cp source/vitaGL.h $(VITASDK)/$(PREFIX)/include/
samples: $(TARGET).a
@make -C samples/sample1
cp "samples/sample1/vitaGL-Sample001.vpk" .
@make -C samples/sample2
cp "samples/sample2/vitaGL-Sample002.vpk" .
@make -C samples/sample3
cp "samples/sample3/vitaGL-Sample003.vpk" .
@make -C samples/sample4
cp "samples/sample4/vitaGL-Sample004.vpk" .
@make -C samples/sample5
cp "samples/sample5/vitaGL-Sample005.vpk" .
@make -C samples/sample6
cp "samples/sample6/vitaGL-Sample006.vpk" .
@make -C samples/sample7
cp "samples/sample7/vitaGL-Sample007.vpk" .

37
deps/vitaGL/README.md vendored Normal file
View File

@ -0,0 +1,37 @@
# vitaGL
vitaGL is a wrapper between openGL and sceGxm. It allows to use a subset of openGL functions with fully hardware acceleration by translating the code to sceGxm equivalent.
# Samples
You can find samples in the *samples* folder in this repository.
# Help and Troubleshooting
If you plan to use vitaGL for one of your projects, you can find an official channel to get help with it on Vita Nuova discord server: https://discord.gg/PyCaBx9
# Projects actually using vitaGL
Here you can find a list of projects using vitaGL:
Direct OpenGL Usage:<br>
[vitaQuake](https://vitadb.rinnegatamante.it/#/info/10) - Port of Quake I and mission packs<br>
[vitaQuakeII](https://vitadb.rinnegatamante.it/#/info/278) -Port of Quake II and mission packs<br>
[vitaQuakeIII](https://vitadb.rinnegatamante.it/#/info/375) - Port of ioquake3 (Quake III: Arena, Quake III: Team Arena, OpenArena)<br>
[vitaRTCW](https://vitadb.rinnegatamante.it/#/info/459) - Port of iortcw (Return to Castle Wolfenstein)<br>
[vitaHexenII](https://vitadb.rinnegatamante.it/#/info/196) - Port of Hexen II<br>
[vitaXash3D](https://vitadb.rinnegatamante.it/#/info/365) - Port of Xash3D (Half Life, Counter Strike 1.6)<br>
[Fade to Black](https://vitadb.rinnegatamante.it/#/info/367) - Port of Fade to Black<br>
[vitaVoyager](https://vitadb.rinnegatamante.it/#/info/367) - Port of lilium-voyager (Star Trek Voyager: Elite Force)<br>
Libraries:<br>
[sdl12_gl](https://github.com/Rinnegatamante/SDL-Vita/tree/sdl12_gl/src) - SDL 1.2 Vita port adapted to work with vitaGL as renderer<br>
[imgui_vita](https://github.com/Rinnegatamante/imgui-vita) - Port of dear imGui <br>
sdl12_gl Apps:<br>
[SuperMarioWar](https://vitadb.rinnegatamante.it/#/info/422) - Port of Super Mario War<br>
[ZeldaOLB](https://vitadb.rinnegatamante.it/#/info/265) - Port of Zelda: Oni Link Begins<br>
[ZeldaROTH](https://vitadb.rinnegatamante.it/#/info/109) - Port of Zelda: Return of the Hylian<br>
[Zelda3T](https://vitadb.rinnegatamante.it/#/info/334) - Port of Zelda: Time to Triumph<br>
[ZeldaNSQ](https://vitadb.rinnegatamante.it/#/info/350) - Port of Zelda: Navi's Quest<br>
[vitaWolfen](https://vitadb.rinnegatamante.it/#/info/31) - Port of Wolf4SDL<br>
[meritous](https://vitadb.rinnegatamante.it/#/info/411) - Port of meritous<br>

6
deps/vitaGL/format.bat vendored Normal file
View File

@ -0,0 +1,6 @@
@echo off
cd source
for /f %%f in ('dir *.c *.h /b/s') do (
echo.%%f | findstr /C:"\\shaders\\">nul || (clang-format -i %%f)
)
cd ..

40
deps/vitaGL/samples/sample1/Makefile vendored Normal file
View File

@ -0,0 +1,40 @@
SAMPLE_NUM := 001
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lSceLibKernel_stub -lScePvf_stub -lmathneon \
-lSceAppMgr_stub -lSceAppUtil_stub -lScePgf_stub \
-ljpeg -lfreetype -lc -lSceCommonDialog_stub -lpng16 -lm -lz \
-lSceGxm_stub -lSceDisplay_stub -lSceSysmodule_stub \
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin -a texture.bmp=texture.bmp $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

67
deps/vitaGL/samples/sample1/main.c vendored Normal file
View File

@ -0,0 +1,67 @@
// Drawing a fullscreen image on screen with glBegin/glEnd
#include <vitaGL.h>
#include <vita2d.h>
#include <stdlib.h>
GLenum texture_format = GL_RGB;
GLuint texture = 0;
int main(){
// Initializing graphics device
vglInit(0x800000);
// Loading BMP image to use as texture
SceUID fd = sceIoOpen("app0:texture.bmp", SCE_O_RDONLY, 0777);
uint16_t w, h;
sceIoLseek(fd, 0x12, SCE_SEEK_SET);
sceIoRead(fd, &w, sizeof(uint16_t));
sceIoLseek(fd, 0x16, SCE_SEEK_SET);
sceIoRead(fd, &h, sizeof(uint16_t));
sceIoLseek(fd, 0x26, SCE_SEEK_SET);
uint8_t *buffer = (uint8_t*)malloc(w * h * 3);
sceIoRead(fd, buffer, w * h * 3);
sceIoClose(fd);
glClearColor(0.50, 0, 0, 0);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, 960, 544, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// Initializing openGL texture
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexImage2D(GL_TEXTURE_2D, 0, texture_format, w, h, 0, texture_format, GL_UNSIGNED_BYTE, buffer);
glEnable(GL_TEXTURE_2D);
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT);
glBindTexture(GL_TEXTURE_2D, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glBegin(GL_QUADS);
// Note: BMP images are vertically flipped
glTexCoord2i(0, 1);
glVertex3f(0, 0, 0);
glTexCoord2i(1, 1);
glVertex3f(960, 0, 0);
glTexCoord2i(1, 0);
glVertex3f(960, 544, 0);
glTexCoord2i(0, 0);
glVertex3f(0, 544, 0);
glEnd();
vglStopRendering();
glLoadIdentity();
}
vglEnd();
}

BIN
deps/vitaGL/samples/sample1/texture.bmp vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

42
deps/vitaGL/samples/sample2/Makefile vendored Normal file
View File

@ -0,0 +1,42 @@
SAMPLE_NUM := 002
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lc -lSceCommonDialog_stub -lm -lSceGxm_stub -lSceDisplay_stub -lmathneon
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
debug: CFLAGS += -DDEBUG_BUILD
debug: all
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
cp $< $<.unstripped.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

39
deps/vitaGL/samples/sample2/main.c vendored Normal file
View File

@ -0,0 +1,39 @@
// Drawing a triangle on screen with vertex array
#include <vitaGL.h>
float colors[] = {1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0};
float vertices[] = {100, 100, 0, 150, 100, 0, 100, 150, 0};
int main(){
// Initializing graphics device
vglInit(0x800000);
glClearColor (0.50f, 0.0f, 0.0f, 1.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, 960, 544, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, vertices);
glColorPointer(3, GL_FLOAT, 0, colors);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
vglStopRendering();
glLoadIdentity();
}
vglEnd();
}

38
deps/vitaGL/samples/sample3/Makefile vendored Normal file
View File

@ -0,0 +1,38 @@
SAMPLE_NUM := 003
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lSceLibKernel_stub -lSceAppMgr_stub -lSceAppUtil_stub -lmathneon \
-lc -lSceCommonDialog_stub -lm -lSceGxm_stub -lSceDisplay_stub -lSceSysmodule_stub \
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

36
deps/vitaGL/samples/sample3/main.c vendored Normal file
View File

@ -0,0 +1,36 @@
// Drawing a colored quad with glBegin/glEnd
#include <vitaGL.h>
int main(){
// Initializing graphics device
vglInit(0x800000);
glClearColor(0.0, 0.0, 0.0, 0.0);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, 960, 544, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT);
glBegin(GL_QUADS);
glColor3f(1.0, 0.0, 0.0);
glVertex3f(400, 0, 0);
glColor3f(1.0, 1.0, 0.0);
glVertex3f(800, 0, 0);
glColor3f(0.0, 1.0, 0.0);
glVertex3f(800, 400, 0);
glColor3f(1.0, 0.0, 1.0);
glVertex3f(400, 400, 0);
glEnd();
vglStopRendering();
glLoadIdentity();
}
vglEnd();
}

37
deps/vitaGL/samples/sample4/Makefile vendored Normal file
View File

@ -0,0 +1,37 @@
SAMPLE_NUM := 004
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lc -lSceCommonDialog_stub -lm -lSceGxm_stub -lSceDisplay_stub -lmathneon
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

38
deps/vitaGL/samples/sample4/main.c vendored Normal file
View File

@ -0,0 +1,38 @@
// Drawing a quad on screen with glDrawElements
#include <vitaGL.h>
float colors[] = {1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0};
float vertices[] = {100, 100, 0, 150, 100, 0, 100, 150, 0, 150, 150, 0};
uint16_t indices[] = {0, 1, 2, 1, 2, 3};
int main(){
// Initializing graphics device
vglInit(0x800000);
glClearColor (0.50f, 0.0f, 0.0f, 1.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, 960, 544, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, vertices);
glColorPointer(3, GL_FLOAT, 0, colors);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, indices);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
vglStopRendering();
glLoadIdentity();
}
vglEnd();
}

37
deps/vitaGL/samples/sample5/Makefile vendored Normal file
View File

@ -0,0 +1,37 @@
SAMPLE_NUM := 005
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lc -lSceCommonDialog_stub -lm -lSceGxm_stub -lSceDisplay_stub -lmathneon
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

82
deps/vitaGL/samples/sample5/main.c vendored Normal file
View File

@ -0,0 +1,82 @@
// Drawing a rotating cube
#include <vitaGL.h>
#include <math.h>
float colors[] = {1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0}; // Colors for a face
float vertices_front[] = {-0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, -0.5f}; // Front Face
float vertices_back[] = {-0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Back Face
float vertices_left[] = {-0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f}; // Left Face
float vertices_right[] = {0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Right Face
float vertices_top[] = {-0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f}; // Top Face
float vertices_bottom[] = {-0.5f, 0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Bottom Face
uint16_t indices[] = {
0, 1, 2, 1, 2, 3, // Front
4, 5, 6, 5, 6, 7, // Back
8, 9,10, 9,10,11, // Left
12,13,14,13,14,15, // Right
16,17,18,17,18,19, // Top
20,21,22,21,22,23 // Bottom
};
void init_perspective(float fov, float aspect, float near, float far){
float half_height = near * tanf(((fov * M_PI) / 180.0f) * 0.5f);
float half_width = half_height * aspect;
glFrustum(-half_width, half_width, -half_height, half_height, near, far);
}
int main(){
// Initializing graphics device
vglInit(0x800000);
vglWaitVblankStart(GL_TRUE);
// Creating colors array
float color_array[12*6];
int i;
for (i=0;i<12*6;i++){
color_array[i] = colors[i % 12];
}
// Creating vertices array
float vertex_array[12*6];
memcpy(&vertex_array[12*0], &vertices_front[0], sizeof(float) * 12);
memcpy(&vertex_array[12*1], &vertices_back[0], sizeof(float) * 12);
memcpy(&vertex_array[12*2], &vertices_left[0], sizeof(float) * 12);
memcpy(&vertex_array[12*3], &vertices_right[0], sizeof(float) * 12);
memcpy(&vertex_array[12*4], &vertices_top[0], sizeof(float) * 12);
memcpy(&vertex_array[12*5], &vertices_bottom[0], sizeof(float) * 12);
glClearColor (0.0f, 0.0f, 0.0f, 0.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
init_perspective(90.0f, 960.f/544.0f, 0.01f, 100.0f);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0.0f, 0.0f, -3.0f); // Centering the cube
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, vertex_array);
glColorPointer(3, GL_FLOAT, 0, color_array);
glRotatef(1.0f, 0.0f, 0.0f, 1.0f);
glRotatef(0.5f, 0.0f, 1.0f, 0.0f);
glDrawElements(GL_TRIANGLES, 6*6, GL_UNSIGNED_SHORT, indices);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
vglStopRendering();
}
vglEnd();
}

37
deps/vitaGL/samples/sample6/Makefile vendored Normal file
View File

@ -0,0 +1,37 @@
SAMPLE_NUM := 006
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lc -lSceCommonDialog_stub -lm -lSceGxm_stub -lSceDisplay_stub -lmathneon
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

96
deps/vitaGL/samples/sample6/main.c vendored Normal file
View File

@ -0,0 +1,96 @@
// Drawing a rotating cube with VBO
#include <vitaGL.h>
#include <math.h>
#define BUF_OFFS(i) ((void*)(i))
float colors[] = {1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0}; // Colors for a face
float vertices_front[] = {-0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, -0.5f}; // Front Face
float vertices_back[] = {-0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Back Face
float vertices_left[] = {-0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f}; // Left Face
float vertices_right[] = {0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Right Face
float vertices_top[] = {-0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, 0.5f}; // Top Face
float vertices_bottom[] = {-0.5f, 0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; // Bottom Face
GLuint buffers[2];
uint16_t indices[] = {
0, 1, 2, 1, 2, 3, // Front
4, 5, 6, 5, 6, 7, // Back
8, 9,10, 9,10,11, // Left
12,13,14,13,14,15, // Right
16,17,18,17,18,19, // Top
20,21,22,21,22,23 // Bottom
};
void init_perspective(float fov, float aspect, float near, float far){
float half_height = near * tanf(((fov * M_PI) / 180.0f) * 0.5f);
float half_width = half_height * aspect;
glFrustum(-half_width, half_width, -half_height, half_height, near, far);
}
int main(){
// Initializing graphics device
vglInit(0x80000);
vglWaitVblankStart(GL_TRUE);
// Creating VBO data with vertices + colors
float vbo[12*12];
memcpy(&vbo[12*0], &vertices_front[0], sizeof(float) * 12);
memcpy(&vbo[12*1], &vertices_back[0], sizeof(float) * 12);
memcpy(&vbo[12*2], &vertices_left[0], sizeof(float) * 12);
memcpy(&vbo[12*3], &vertices_right[0], sizeof(float) * 12);
memcpy(&vbo[12*4], &vertices_top[0], sizeof(float) * 12);
memcpy(&vbo[12*5], &vertices_bottom[0], sizeof(float) * 12);
memcpy(&vbo[12*6], &colors[0], sizeof(float) * 12);
memcpy(&vbo[12*7], &colors[0], sizeof(float) * 12);
memcpy(&vbo[12*8], &colors[0], sizeof(float) * 12);
memcpy(&vbo[12*9], &colors[0], sizeof(float) * 12);
memcpy(&vbo[12*10], &colors[0], sizeof(float) * 12);
memcpy(&vbo[12*11], &colors[0], sizeof(float) * 12);
// Creating two buffers for colors, vertices and indices
glGenBuffers(2, buffers);
// Setting up VBO
glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 12 * 12, vbo, GL_STATIC_DRAW);
// Setting up indices array
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffers[1]);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint16_t) * 6 * 6, indices, GL_STATIC_DRAW);
glClearColor (0.0f, 0.0f, 0.0f, 0.0f);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
init_perspective(90.0f, 960.f/544.0f, 0.01f, 100.0f);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0.0f, 0.0f, -3.0f); // Centering the cube
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, BUF_OFFS(0));
glColorPointer(3, GL_FLOAT, 0, BUF_OFFS(12*6*sizeof(float)));
glRotatef(1.0f, 0.0f, 0.0f, 1.0f);
glRotatef(0.5f, 1.0f, 0.0f, 0.0f);
glDrawElements(GL_TRIANGLES, 6*6, GL_UNSIGNED_SHORT, BUF_OFFS(0));
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
vglStopRendering();
}
vglEnd();
}

40
deps/vitaGL/samples/sample7/Makefile vendored Normal file
View File

@ -0,0 +1,40 @@
SAMPLE_NUM := 007
TARGET := vitaGL-Sample$(SAMPLE_NUM)
SOURCES := .
INCLUDES := include
LIBS = -lvitaGL -lSceLibKernel_stub -lScePvf_stub \
-lSceAppMgr_stub -lSceAppUtil_stub -lScePgf_stub \
-ljpeg -lfreetype -lc -lSceCommonDialog_stub -lpng16 -lm -lz \
-lSceGxm_stub -lSceDisplay_stub -lSceSysmodule_stub -lmathneon
CFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.c))
CPPFILES := $(foreach dir,$(SOURCES), $(wildcard $(dir)/*.cpp))
BINFILES := $(foreach dir,$(DATA), $(wildcard $(dir)/*.bin))
OBJS := $(addsuffix .o,$(BINFILES)) $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
PREFIX = arm-vita-eabi
CC = $(PREFIX)-gcc
CXX = $(PREFIX)-g++
CFLAGS = -g -Wl,-q -O2 -ftree-vectorize
CXXFLAGS = $(CFLAGS) -fno-exceptions -std=gnu++11 -fpermissive
ASFLAGS = $(CFLAGS)
all: $(TARGET).vpk
$(TARGET).vpk: eboot.bin
vita-mksfoex -s TITLE_ID=VITAGL$(SAMPLE_NUM) "$(TARGET)" param.sfo
vita-pack-vpk -s param.sfo -b eboot.bin -a texture.bmp=texture.bmp $@
eboot.bin: $(TARGET).velf
vita-make-fself -s $< eboot.bin
%.velf: %.elf
vita-elf-create $< $@
$(TARGET).elf: $(OBJS)
$(CC) $(CFLAGS) $^ $(LIBS) -o $@
clean:
@rm -rf *.velf *.elf *.vpk $(OBJS) param.sfo eboot.bin

91
deps/vitaGL/samples/sample7/main.c vendored Normal file
View File

@ -0,0 +1,91 @@
// Drawing a fullscreen image on screen with glBegin/glEnd
#include <vitaGL.h>
#include <vita2d.h>
#include <stdlib.h>
GLenum texture_format = GL_RGB;
GLuint texture = 0;
float colors[] = {0.4, 0.1, 0.3, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0};
float vertices[] = {100, 100, 0, 150, 100, 0, 100, 150, 0};
int main(){
// Initializing graphics device
vglInit(0x800000);
// Loading BMP image to use as texture
SceUID fd = sceIoOpen("app0:texture.bmp", SCE_O_RDONLY, 0777);
uint16_t w, h;
sceIoLseek(fd, 0x12, SCE_SEEK_SET);
sceIoRead(fd, &w, sizeof(uint16_t));
sceIoLseek(fd, 0x16, SCE_SEEK_SET);
sceIoRead(fd, &h, sizeof(uint16_t));
sceIoLseek(fd, 0x26, SCE_SEEK_SET);
uint8_t *buffer = (uint8_t*)malloc(w * h * 3);
sceIoRead(fd, buffer, w * h * 3);
sceIoClose(fd);
glClearColor(0.50, 0, 0, 0);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, 960, 544, 0, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
// Initializing openGL texture
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexImage2D(GL_TEXTURE_2D, 0, texture_format, w, h, 0, texture_format, GL_UNSIGNED_BYTE, buffer);
glEnable(GL_TEXTURE_2D);
// Initializing framebuffer
GLuint fb;
glGenFramebuffers(1, &fb);
glBindFramebuffer(GL_FRAMEBUFFER, fb);
// Binding texture to framebuffer
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, texture, 0);
// Drawing on texture
vglStartRendering();
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glVertexPointer(3, GL_FLOAT, 0, vertices);
glColorPointer(3, GL_FLOAT, 0, colors);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
vglStopRendering();
glFinish();
glLoadIdentity();
glBindFramebuffer(GL_FRAMEBUFFER, 0);
for (;;){
vglStartRendering();
glClear(GL_COLOR_BUFFER_BIT);
glBindTexture(GL_TEXTURE_2D, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glBegin(GL_QUADS);
// Note: BMP images are vertically flipped
glTexCoord2i(0, 1);
glVertex3f(0, 0, 0);
glTexCoord2i(1, 1);
glVertex3f(960, 0, 0);
glTexCoord2i(1, 0);
glVertex3f(960, 544, 0);
glTexCoord2i(0, 0);
glVertex3f(0, 544, 0);
glEnd();
vglStopRendering();
glLoadIdentity();
}
vglEnd();
}

BIN
deps/vitaGL/samples/sample7/texture.bmp vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

4
deps/vitaGL/shaders/clear_f.cg vendored Normal file
View File

@ -0,0 +1,4 @@
float4 main(uniform float4 u_clear_color) : COLOR
{
return u_clear_color;
}

5
deps/vitaGL/shaders/clear_v.cg vendored Normal file
View File

@ -0,0 +1,5 @@
float4 main(float2 position) : POSITION
{
return float4(position, 1.f, 1.f);
}

View File

@ -0,0 +1,5 @@
float main(
uniform float depth_clear) : DEPTH
{
return depth_clear;
}

11
deps/vitaGL/shaders/rgb_v.cg vendored Normal file
View File

@ -0,0 +1,11 @@
void main(
float3 aPosition,
float3 aColor,
column_major uniform float4x4 wvp,
float4 out vPosition: POSITION,
float4 out vColor: COLOR)
{
vPosition = mul(float4(aPosition, 1.f), wvp);
vColor = float4(aColor, 1.0);
}

4
deps/vitaGL/shaders/rgba_f.cg vendored Normal file
View File

@ -0,0 +1,4 @@
float4 main(float4 vColor : COLOR) : COLOR
{
return vColor;
}

11
deps/vitaGL/shaders/rgba_v.cg vendored Normal file
View File

@ -0,0 +1,11 @@
void main(
float3 aPosition,
float4 aColor,
column_major uniform float4x4 wvp,
float4 out vPosition: POSITION,
float4 out vColor: COLOR)
{
vPosition = mul(float4(aPosition, 1.f), wvp);
vColor = aColor;
}

69
deps/vitaGL/shaders/texture2d_f.cg vendored Normal file
View File

@ -0,0 +1,69 @@
float4 main(
float2 vTexcoord : TEXCOORD0,
float vFog : FOG,
uniform sampler2D tex,
uniform float alphaCut,
uniform int alphaOp,
uniform float4 tintColor,
uniform int texEnv,
uniform int fog_mode,
uniform float4 fogColor,
uniform float4 texEnvColor
)
{
float4 texColor = tex2D(tex, vTexcoord);
// Texture Environment
if (texEnv < 4){
if (texEnv == 0){ // GL_MODULATE
texColor = texColor * tintColor;
}else if (texEnv == 1){ // GL_DECAL
texColor.rgb = lerp(tintColor.rgb, texColor.rgb, texColor.a);
texColor.a = tintColor.a;
}else if (texEnv == 2){ // GL_BLEND
texColor.rgb = lerp(tintColor.rgb, texEnvColor.rgb, texColor.rgb);
texColor.a = texColor.a * tintColor.a;
}else{ // GL_ADD
texColor.rgb = clamp(texColor.rgb + tintColor.rgb, 0.0, 1.0);
texColor.a = texColor.a * tintColor.a;
}
}
// Alpha Test
if (alphaOp < 7){
if (alphaOp == 0){
if (texColor.a < alphaCut){
discard;
}
}else if (alphaOp == 1){
if (texColor.a <= alphaCut){
discard;
}
}else if (alphaOp == 2){
if (texColor.a == alphaCut){
discard;
}
}else if (alphaOp == 3){
if (texColor.a != alphaCut){
discard;
}
}else if (alphaOp == 4){
if (texColor.a > alphaCut){
discard;
}
}else if (alphaOp == 5){
if (texColor.a >= alphaCut){
discard;
}
}else{
discard;
}
}
// Fogging
if (fog_mode < 3){
texColor.rgb = lerp(fogColor.rgb, texColor.rgb, vFog);
}
return texColor;
}

69
deps/vitaGL/shaders/texture2d_rgba_f.cg vendored Normal file
View File

@ -0,0 +1,69 @@
float4 main(
float2 vTexcoord : TEXCOORD0,
float4 vColor : COLOR,
float vFog : FOG,
uniform sampler2D tex,
uniform float alphaCut,
uniform int alphaOp,
uniform int texEnv,
uniform int fog_mode,
uniform float4 fogColor,
uniform float4 texEnvColor
)
{
float4 texColor = tex2D(tex, vTexcoord);
// Texture Environment
if (texEnv < 4){
if (texEnv == 0){ // GL_MODULATE
texColor = texColor * vColor;
}else if (texEnv == 1){ // GL_DECAL
texColor.rgb = lerp(vColor.rgb, texColor.rgb, texColor.a);
texColor.a = vColor.a;
}else if (texEnv == 2){ // GL_BLEND
texColor.rgb = lerp(vColor.rgb, texEnvColor.rgb, texColor.rgb);
texColor.a = texColor.a * vColor.a;
}else{ // GL_ADD
texColor.rgb = clamp(texColor.rgb + vColor.rgb, 0.0, 1.0);
texColor.a = texColor.a * vColor.a;
}
}
// Alpha Test
if (alphaOp < 7){
if (alphaOp == 0){
if (texColor.a < alphaCut){
discard;
}
}else if (alphaOp == 1){
if (texColor.a <= alphaCut){
discard;
}
}else if (alphaOp == 2){
if (texColor.a == alphaCut){
discard;
}
}else if (alphaOp == 3){
if (texColor.a != alphaCut){
discard;
}
}else if (alphaOp == 4){
if (texColor.a > alphaCut){
discard;
}
}else if (alphaOp == 5){
if (texColor.a >= alphaCut){
discard;
}
}else{
discard;
}
}
// Fogging
if (fog_mode < 3){
texColor.rgb = lerp(fogColor.rgb, texColor.rgb, vFog);
}
return texColor;
}

47
deps/vitaGL/shaders/texture2d_rgba_v.cg vendored Normal file
View File

@ -0,0 +1,47 @@
void main(
float3 position,
float2 texcoord,
float4 color,
column_major uniform float4x4 wvp,
uniform int fog_mode,
uniform int clip_plane0, // In the future this can turn into an array to support other planes
uniform float4 clip_plane0_eq, // In the future this can turn into an array to support other planes
uniform float4x4 modelview,
uniform float fog_near,
uniform float fog_far,
uniform float fog_density,
float4 out vPosition : POSITION,
float out vFog : FOG,
float2 out vTexcoord : TEXCOORD0,
float4 out vColor : COLOR,
float out vClip : CLP0)
{
float4 pos4 = float4(position, 1.f);
// User clip planes
if (clip_plane0 < 1) {
vClip = 1.f;
} else {
float4 modelpos = mul(modelview, pos4);
vClip = dot(modelpos, clip_plane0_eq);
}
vPosition = mul(pos4, wvp);
if (fog_mode > 2) { // Fogging disabled
vFog = 1.0f;
} else {
float dist = length(vPosition.xyz);
if (fog_mode == 0) { // GL_LINEAR
vFog = (fog_far - dist) / (fog_far - fog_near);
} else if (fog_mode == 1) { // GL_EXP
vFog = exp(-fog_density * dist);
} else { // GL_EXP2
const float LOG2 = -1.442695;
float d = fog_density * dist;
vFog = exp(d * d * LOG2);
}
vFog = clamp(vFog, 0.0, 1.0);
}
vTexcoord = texcoord;
vColor = color;
}

44
deps/vitaGL/shaders/texture2d_v.cg vendored Normal file
View File

@ -0,0 +1,44 @@
void main(
float3 position,
float2 texcoord,
column_major uniform float4x4 wvp,
uniform int fog_mode,
uniform int clip_plane0, // In the future this can turn into an array to support other planes
uniform float4 clip_plane0_eq, // In the future this can turn into an array to support other planes
uniform float4x4 modelview,
uniform float fog_near,
uniform float fog_far,
uniform float fog_density,
float4 out vPosition : POSITION,
float out vFog : FOG,
float2 out vTexcoord : TEXCOORD0,
float out vClip : CLP0)
{
float4 pos4 = float4(position, 1.f);
// User clip planes
if (clip_plane0 < 1) {
vClip = 1.f;
} else {
float4 modelpos = mul(modelview, pos4);
vClip = dot(modelpos, clip_plane0_eq);
}
vPosition = mul(pos4, wvp);
if (fog_mode > 2){ // Fogging disabled
vFog = 1.0f;
}else{
float dist = length(vPosition.xyz);
if (fog_mode == 0){ // GL_LINEAR
vFog = (fog_far - dist) / (fog_far - fog_near);
}else if (fog_mode == 1){ // GL_EXP
vFog = exp(-fog_density * dist);
}else{ // GL_EXP2
const float LOG2 = -1.442695;
float d = fog_density * dist;
vFog = exp(d * d * LOG2);
}
vFog = clamp(vFog, 0.0, 1.0);
}
vTexcoord = texcoord;
}

429
deps/vitaGL/source/custom_shaders.c vendored Normal file
View File

@ -0,0 +1,429 @@
/*
* custom_shaders.c:
* Implementation for custom shaders feature
*/
#include "shared.h"
#define MAX_CUSTOM_SHADERS 32 // Maximum number of linkable custom shaders
#define MAX_SHADER_PARAMS 16 // Maximum number of parameters per custom shader
// Internal stuffs
void *frag_uniforms = NULL;
void *vert_uniforms = NULL;
GLuint cur_program = 0; // Current in use custom program (0 = No custom program)
// Uniform struct
typedef struct uniform {
GLboolean isVertex;
const SceGxmProgramParameter *ptr;
void *chain;
} uniform;
// Generic shader struct
typedef struct shader {
GLenum type;
GLboolean valid;
SceGxmShaderPatcherId id;
const SceGxmProgram *prog;
} shader;
// Program struct holding vertex/fragment shader info
typedef struct program {
shader *vshader;
shader *fshader;
GLboolean valid;
SceGxmVertexAttribute attr[16];
SceGxmVertexStream stream[16];
SceGxmVertexProgram *vprog;
SceGxmFragmentProgram *fprog;
GLuint attr_num;
const SceGxmProgramParameter *wvp;
uniform *uniforms;
uniform *last_uniform;
} program;
// Internal shaders array
static shader shaders[MAX_CUSTOM_SHADERS];
// Internal programs array
static program progs[MAX_CUSTOM_SHADERS / 2];
void resetCustomShaders(void) {
// Init custom shaders
int i;
for (i = 0; i < MAX_CUSTOM_SHADERS; i++) {
shaders[i].valid = 0;
progs[i >> 1].valid = 0;
}
}
void changeCustomShadersBlend(SceGxmBlendInfo *blend_info) {
int j;
for (j = 0; j < MAX_CUSTOM_SHADERS / 2; j++) {
program *p = &progs[j];
if (p->valid) {
sceGxmShaderPatcherCreateFragmentProgram(gxm_shader_patcher,
p->fshader->id,
SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4,
msaa_mode,
blend_info,
p->vshader->prog,
&p->fprog);
}
}
}
void reloadCustomShader(void) {
if (cur_program == 0)
return;
program *p = &progs[cur_program - 1];
sceGxmSetVertexProgram(gxm_context, p->vprog);
sceGxmSetFragmentProgram(gxm_context, p->fprog);
}
void _vglDrawObjects_CustomShadersIMPL(GLenum mode, GLsizei count, GLboolean implicit_wvp) {
program *p = &progs[cur_program - 1];
if (implicit_wvp) {
if (mvp_modified) {
matrix4x4_multiply(mvp_matrix, projection_matrix, modelview_matrix);
mvp_modified = GL_FALSE;
}
if (vert_uniforms == NULL)
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vert_uniforms);
if (p->wvp == NULL)
p->wvp = sceGxmProgramFindParameterByName(p->vshader->prog, "wvp");
sceGxmSetUniformDataF(vert_uniforms, p->wvp, 0, 16, (const float *)mvp_matrix);
}
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
GLuint glCreateShader(GLenum shaderType) {
// Looking for a free shader slot
GLuint i, res = 0;
for (i = 1; i <= MAX_CUSTOM_SHADERS; i++) {
if (!(shaders[i - 1].valid)) {
res = i;
break;
}
}
// All shader slots are busy, exiting call
if (res == 0)
return res;
// Reserving and initializing shader slot
switch (shaderType) {
case GL_FRAGMENT_SHADER:
shaders[res - 1].type = GL_FRAGMENT_SHADER;
break;
case GL_VERTEX_SHADER:
shaders[res - 1].type = GL_VERTEX_SHADER;
break;
default:
error = GL_INVALID_ENUM;
break;
}
shaders[res - 1].valid = GL_TRUE;
return res;
}
void glShaderBinary(GLsizei count, const GLuint *handles, GLenum binaryFormat, const void *binary, GLsizei length) {
// Grabbing passed shader
shader *s = &shaders[handles[0] - 1];
// Allocating compiled shader on RAM and registering it into sceGxmShaderPatcher
s->prog = (SceGxmProgram *)malloc(length);
memcpy((void *)s->prog, binary, length);
sceGxmShaderPatcherRegisterProgram(gxm_shader_patcher, s->prog, &s->id);
s->prog = sceGxmShaderPatcherGetProgramFromId(s->id);
}
void glDeleteShader(GLuint shad) {
// Grabbing passed shader
shader *s = &shaders[shad - 1];
// Deallocating shader and unregistering it from sceGxmShaderPatcher
if (s->valid) {
sceGxmShaderPatcherForceUnregisterProgram(gxm_shader_patcher, s->id);
free((void *)s->prog);
}
s->valid = GL_FALSE;
}
void glAttachShader(GLuint prog, GLuint shad) {
// Grabbing passed shader and program
shader *s = &shaders[shad - 1];
program *p = &progs[prog - 1];
// Attaching shader to desired program
if (p->valid && s->valid) {
switch (s->type) {
case GL_VERTEX_SHADER:
p->vshader = s;
break;
case GL_FRAGMENT_SHADER:
p->fshader = s;
break;
default:
break;
}
} else
error = GL_INVALID_VALUE;
}
GLuint glCreateProgram(void) {
// Looking for a free program slot
GLuint i, res = 0;
for (i = 1; i <= (MAX_CUSTOM_SHADERS / 2); i++) {
// Program slot found, reserving and initializing it
if (!(progs[i - 1].valid)) {
res = i;
progs[i - 1].valid = GL_TRUE;
progs[i - 1].attr_num = 0;
progs[i - 1].wvp = NULL;
progs[i - 1].uniforms = NULL;
progs[i - 1].last_uniform = NULL;
break;
}
}
return res;
}
void glDeleteProgram(GLuint prog) {
// Grabbing passed program
program *p = &progs[prog - 1];
// Releasing both vertex and fragment programs from sceGxmShaderPatcher
if (p->valid) {
unsigned int count, i;
sceGxmShaderPatcherGetFragmentProgramRefCount(gxm_shader_patcher, p->fprog, &count);
for (i = 0; i < count; i++) {
sceGxmShaderPatcherReleaseFragmentProgram(gxm_shader_patcher, p->fprog);
sceGxmShaderPatcherReleaseVertexProgram(gxm_shader_patcher, p->vprog);
}
while (p->uniforms != NULL) {
uniform *old = p->uniforms;
p->uniforms = (uniform *)p->uniforms->chain;
free(old);
}
}
p->valid = GL_FALSE;
}
void glLinkProgram(GLuint progr) {
// Grabbing passed program
program *p = &progs[progr - 1];
// Creating fragment and vertex program via sceGxmShaderPatcher
sceGxmShaderPatcherCreateVertexProgram(gxm_shader_patcher,
p->vshader->id, p->attr, p->attr_num,
p->stream, p->attr_num, &p->vprog);
sceGxmShaderPatcherCreateFragmentProgram(gxm_shader_patcher,
p->fshader->id, SCE_GXM_OUTPUT_REGISTER_FORMAT_UCHAR4,
msaa_mode, NULL, p->vshader->prog,
&p->fprog);
}
void glUseProgram(GLuint prog) {
// Setting current custom program to passed program
cur_program = prog;
// Setting in-use vertex and fragment program in sceGxm
reloadCustomShader();
}
GLint glGetUniformLocation(GLuint prog, const GLchar *name) {
// Grabbing passed program
program *p = &progs[prog - 1];
uniform *res = (uniform *)malloc(sizeof(uniform));
res->chain = NULL;
if (p->last_uniform != NULL)
p->last_uniform->chain = (void *)res;
p->last_uniform = res;
// Checking if parameter is a vertex or fragment related one
res->ptr = sceGxmProgramFindParameterByName(p->vshader->prog, name);
res->isVertex = GL_TRUE;
if (res->ptr == NULL) {
res->ptr = sceGxmProgramFindParameterByName(p->fshader->prog, name);
res->isVertex = GL_FALSE;
}
return (GLint)res;
}
void glUniform1f(GLint location, GLfloat v0) {
// Grabbing passed uniform
uniform *u = (uniform *)location;
if (u->ptr == NULL)
return;
// Setting passed value to desired uniform
if (u->isVertex) {
if (vert_uniforms == NULL)
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vert_uniforms);
sceGxmSetUniformDataF(vert_uniforms, u->ptr, 0, 1, &v0);
} else {
if (frag_uniforms == NULL)
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &frag_uniforms);
sceGxmSetUniformDataF(frag_uniforms, u->ptr, 0, 1, &v0);
}
}
void glUniform2fv(GLint location, GLsizei count, const GLfloat *value) {
// Grabbing passed uniform
uniform *u = (uniform *)location;
if (u->ptr == NULL)
return;
// Setting passed value to desired uniform
if (u->isVertex) {
if (vert_uniforms == NULL)
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vert_uniforms);
sceGxmSetUniformDataF(vert_uniforms, u->ptr, 0, 2 * count, value);
} else {
if (frag_uniforms == NULL)
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &frag_uniforms);
sceGxmSetUniformDataF(frag_uniforms, u->ptr, 0, 2 * count, value);
}
}
void glUniform4fv(GLint location, GLsizei count, const GLfloat *value) {
// Grabbing passed uniform
uniform *u = (uniform *)location;
if (u->ptr == NULL)
return;
// Setting passed value to desired uniform
if (u->isVertex) {
if (vert_uniforms == NULL)
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vert_uniforms);
sceGxmSetUniformDataF(vert_uniforms, u->ptr, 0, 4 * count, value);
} else {
if (frag_uniforms == NULL)
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &frag_uniforms);
sceGxmSetUniformDataF(frag_uniforms, u->ptr, 0, 4 * count, value);
}
}
void glUniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value) {
// Grabbing passed uniform
uniform *u = (uniform *)location;
if (u->ptr == NULL)
return;
// Setting passed value to desired uniform
if (u->isVertex) {
if (vert_uniforms == NULL)
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vert_uniforms);
sceGxmSetUniformDataF(vert_uniforms, u->ptr, 0, 16 * count, value);
} else {
if (frag_uniforms == NULL)
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &frag_uniforms);
sceGxmSetUniformDataF(frag_uniforms, u->ptr, 0, 16 * count, value);
}
}
/*
* ------------------------------
* - VGL_EXT_gxp_shaders -
* ------------------------------
*/
// Equivalent of glBindAttribLocation but for sceGxm architecture
void vglBindAttribLocation(GLuint prog, GLuint index, const GLchar *name, const GLuint num, const GLenum type) {
// Grabbing passed program
program *p = &progs[prog - 1];
SceGxmVertexAttribute *attributes = &p->attr[index];
SceGxmVertexStream *streams = &p->stream[index];
// Looking for desired parameter in requested program
const SceGxmProgramParameter *param = sceGxmProgramFindParameterByName(p->vshader->prog, name);
// Setting stream index and offset values
attributes->streamIndex = index;
attributes->offset = 0;
// Detecting attribute format and size
int bpe;
switch (type) {
case GL_FLOAT:
attributes->format = SCE_GXM_ATTRIBUTE_FORMAT_F32;
bpe = sizeof(float);
break;
case GL_UNSIGNED_BYTE:
attributes->format = SCE_GXM_ATTRIBUTE_FORMAT_U8N;
bpe = sizeof(uint8_t);
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Setting various info about the stream
attributes->componentCount = num;
attributes->regIndex = sceGxmProgramParameterGetResourceIndex(param);
streams->stride = bpe * num;
streams->indexSource = SCE_GXM_INDEX_SOURCE_INDEX_16BIT;
if (index >= p->attr_num)
p->attr_num = index + 1;
}
// Equivalent of glVertexAttribLocation but for sceGxm architecture
void vglVertexAttribPointer(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, GLuint count, const GLvoid *pointer) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (stride < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Detecting type size
int bpe;
switch (type) {
case GL_FLOAT:
bpe = sizeof(GLfloat);
break;
case GL_SHORT:
bpe = sizeof(GLshort);
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Allocating enough memory on vitaGL mempool
void *ptr = gpu_pool_memalign(count * bpe * size, bpe * size);
// Copying passed data to vitaGL mempool
if (stride == 0)
memcpy(ptr, pointer, count * bpe * size); // Faster if stride == 0
else {
int i;
uint8_t *dst = (uint8_t *)ptr;
uint8_t *src = (uint8_t *)pointer;
for (i = 0; i < count; i++) {
memcpy(dst, src, bpe * size);
dst += (bpe * size);
src += stride;
}
}
// Setting vertex stream to passed index in sceGxm
sceGxmSetVertexStream(gxm_context, index, ptr);
}
void vglVertexAttribPointerMapped(GLuint index, const GLvoid *pointer) {
// Setting vertex stream to passed index in sceGxm
sceGxmSetVertexStream(gxm_context, index, pointer);
}

166
deps/vitaGL/source/framebuffers.c vendored Normal file
View File

@ -0,0 +1,166 @@
/*
* framebuffers.c:
* Implementation for framebuffers related functions
*/
#include "shared.h"
static framebuffer framebuffers[BUFFERS_NUM]; // Framebuffers array
framebuffer *active_read_fb = NULL; // Current readback framebuffer in use
framebuffer *active_write_fb = NULL; // Current write framebuffer in use
uint32_t get_color_from_texture(uint32_t type) {
uint32_t res = 0;
switch (type) {
case GL_RGB:
res = SCE_GXM_COLOR_FORMAT_U8U8U8_BGR;
break;
case GL_RGBA:
res = SCE_GXM_COLOR_FORMAT_U8U8U8U8_ABGR;
break;
case GL_LUMINANCE:
res = SCE_GXM_COLOR_FORMAT_U8_R;
break;
case GL_LUMINANCE_ALPHA:
res = SCE_GXM_COLOR_FORMAT_U8U8_GR;
break;
case GL_INTENSITY:
res = SCE_GXM_COLOR_FORMAT_U8_R;
break;
case GL_ALPHA:
res = SCE_GXM_COLOR_FORMAT_U8_A;
break;
default:
error = GL_INVALID_ENUM;
break;
}
return res;
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glGenFramebuffers(GLsizei n, GLuint *ids) {
int i = 0, j = 0;
#ifndef SKIP_ERROR_HANDLING
if (n < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
for (i = 0; i < BUFFERS_NUM; i++) {
if (!framebuffers[i].active) {
ids[j++] = (GLuint)&framebuffers[i];
framebuffers[i].active = 1;
framebuffers[i].depth_buffer_addr = NULL;
framebuffers[i].stencil_buffer_addr = NULL;
}
if (j >= n)
break;
}
}
void glDeleteFramebuffers(GLsizei n, GLuint *framebuffers) {
#ifndef SKIP_ERROR_HANDLING
if (n < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
while (n > 0) {
framebuffer *fb = (framebuffer *)framebuffers[n--];
fb->active = 0;
if (fb->target) {
sceGxmDestroyRenderTarget(fb->target);
fb->target = NULL;
}
if (fb->depth_buffer_addr) {
mempool_free(fb->depth_buffer_addr, fb->depth_buffer_mem_type);
mempool_free(fb->stencil_buffer_addr, fb->stencil_buffer_mem_type);
fb->depth_buffer_addr = NULL;
fb->stencil_buffer_addr = NULL;
}
}
}
void glBindFramebuffer(GLenum target, GLuint fb) {
switch (target) {
case GL_DRAW_FRAMEBUFFER:
active_write_fb = (framebuffer *)fb;
break;
case GL_READ_FRAMEBUFFER:
active_read_fb = (framebuffer *)fb;
break;
case GL_FRAMEBUFFER:
active_write_fb = active_read_fb = (framebuffer *)fb;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glFramebufferTexture(GLenum target, GLenum attachment, GLuint tex_id, GLint level) {
// Detecting requested framebuffer
framebuffer *fb = NULL;
switch (target) {
case GL_DRAW_FRAMEBUFFER:
case GL_FRAMEBUFFER:
fb = active_write_fb;
break;
case GL_READ_FRAMEBUFFER:
fb = active_read_fb;
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Aliasing to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
texture *tex = &tex_unit->textures[tex_id];
// Extracting texture sizes
int tex_w = sceGxmTextureGetWidth(&tex->gxm_tex);
int tex_h = sceGxmTextureGetHeight(&tex->gxm_tex);
// Detecting requested attachment
switch (attachment) {
case GL_COLOR_ATTACHMENT0:
// Allocating colorbuffer
sceGxmColorSurfaceInit(
&fb->colorbuffer,
get_color_from_texture(tex->type),
SCE_GXM_COLOR_SURFACE_LINEAR,
msaa_mode == SCE_GXM_MULTISAMPLE_NONE ? SCE_GXM_COLOR_SURFACE_SCALE_NONE : SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE,
SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT,
tex_w,
tex_h,
tex_w,
sceGxmTextureGetData(&tex->gxm_tex));
// Allocating depth and stencil buffer (FIXME: This probably shouldn't be here)
initDepthStencilBuffer(tex_w, tex_h, &fb->depthbuffer, &fb->depth_buffer_addr, &fb->stencil_buffer_addr, &fb->depth_buffer_mem_type, &fb->stencil_buffer_mem_type);
// Creating rendertarget
SceGxmRenderTargetParams renderTargetParams;
memset(&renderTargetParams, 0, sizeof(SceGxmRenderTargetParams));
renderTargetParams.flags = 0;
renderTargetParams.width = sceGxmTextureGetWidth(&tex->gxm_tex);
renderTargetParams.height = sceGxmTextureGetHeight(&tex->gxm_tex);
renderTargetParams.scenesPerFrame = 1;
renderTargetParams.multisampleMode = msaa_mode;
renderTargetParams.multisampleLocations = 0;
renderTargetParams.driverMemBlock = -1;
sceGxmCreateRenderTarget(&renderTargetParams, &fb->target);
break;
default:
error = GL_INVALID_ENUM;
break;
}
}

170
deps/vitaGL/source/get_info.c vendored Normal file
View File

@ -0,0 +1,170 @@
/*
* get_info.c:
* Implementation for functions returning info to end user
*/
#include "shared.h"
// Constants returned by glGetString
static const GLubyte *vendor = "Rinnegatamante";
static const GLubyte *renderer = "SGX543MP4+";
static const GLubyte *version = "VitaGL 1.0";
static const GLubyte *extensions = "VGL_EXT_gpu_objects_array VGL_EXT_gxp_shaders";
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
const GLubyte *glGetString(GLenum name) {
switch (name) {
case GL_VENDOR: // Vendor
return vendor;
break;
case GL_RENDERER: // Renderer
return renderer;
break;
case GL_VERSION: // openGL Version
return version;
break;
case GL_EXTENSIONS: // Supported extensions
return extensions;
break;
default:
error = GL_INVALID_ENUM;
return NULL;
break;
}
}
void glGetBooleanv(GLenum pname, GLboolean *params) {
switch (pname) {
case GL_BLEND: // Blending feature state
*params = blend_state;
break;
case GL_BLEND_DST_ALPHA: // Blend Alpha Factor for Destination
*params = (blend_dfactor_a == SCE_GXM_BLEND_FACTOR_ZERO) ? GL_FALSE : GL_TRUE;
break;
case GL_BLEND_DST_RGB: // Blend RGB Factor for Destination
*params = (blend_dfactor_rgb == SCE_GXM_BLEND_FACTOR_ZERO) ? GL_FALSE : GL_TRUE;
break;
case GL_BLEND_SRC_ALPHA: // Blend Alpha Factor for Source
*params = (blend_sfactor_a == SCE_GXM_BLEND_FACTOR_ZERO) ? GL_FALSE : GL_TRUE;
break;
case GL_BLEND_SRC_RGB: // Blend RGB Factor for Source
*params = (blend_sfactor_rgb == SCE_GXM_BLEND_FACTOR_ZERO) ? GL_FALSE : GL_TRUE;
break;
case GL_DEPTH_TEST: // Depth test state
*params = depth_test_state;
break;
case GL_ACTIVE_TEXTURE: // Active texture
*params = GL_FALSE;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glGetFloatv(GLenum pname, GLfloat *data) {
switch (pname) {
case GL_POLYGON_OFFSET_FACTOR: // Polygon offset factor
*data = pol_factor;
break;
case GL_POLYGON_OFFSET_UNITS: // Polygon offset units
*data = pol_units;
break;
case GL_MODELVIEW_MATRIX: // Modelview matrix
memcpy(data, &modelview_matrix, sizeof(matrix4x4));
break;
case GL_ACTIVE_TEXTURE: // Active texture
*data = (1.0f * (server_texture_unit + GL_TEXTURE0));
break;
case GL_MAX_MODELVIEW_STACK_DEPTH: // Max modelview stack depth
*data = MODELVIEW_STACK_DEPTH;
break;
case GL_MAX_PROJECTION_STACK_DEPTH: // Max projection stack depth
*data = GENERIC_STACK_DEPTH;
break;
case GL_MAX_TEXTURE_STACK_DEPTH: // Max texture stack depth
*data = GENERIC_STACK_DEPTH;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glGetIntegerv(GLenum pname, GLint *data) {
// Aliasing to make code more readable
texture_unit *server_tex_unit = &texture_units[server_texture_unit];
switch (pname) {
case GL_POLYGON_MODE:
data[0] = gl_polygon_mode_front;
data[1] = gl_polygon_mode_back;
break;
case GL_SCISSOR_BOX:
data[0] = region.x;
data[1] = region.y;
data[2] = region.w;
data[3] = region.h;
break;
case GL_TEXTURE_BINDING_2D:
*data = server_tex_unit->tex_id;
break;
case GL_MAX_TEXTURE_SIZE:
*data = 1024;
break;
case GL_VIEWPORT:
data[0] = gl_viewport.x;
data[1] = gl_viewport.y;
data[2] = gl_viewport.w;
data[3] = gl_viewport.h;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
GLboolean glIsEnabled(GLenum cap) {
GLboolean ret = GL_FALSE;
switch (cap) {
case GL_DEPTH_TEST:
ret = depth_test_state;
break;
case GL_STENCIL_TEST:
ret = stencil_test_state;
break;
case GL_BLEND:
ret = blend_state;
break;
case GL_SCISSOR_TEST:
ret = scissor_test_state;
break;
case GL_CULL_FACE:
ret = cull_face_state;
break;
case GL_POLYGON_OFFSET_FILL:
ret = pol_offset_fill;
break;
case GL_POLYGON_OFFSET_LINE:
ret = pol_offset_line;
break;
case GL_POLYGON_OFFSET_POINT:
ret = pol_offset_point;
break;
default:
error = GL_INVALID_ENUM;
break;
}
return ret;
}
GLenum glGetError(void) {
GLenum ret = error;
error = GL_NO_ERROR;
return ret;
}

384
deps/vitaGL/source/gxm.c vendored Normal file
View File

@ -0,0 +1,384 @@
/*
* gxm.c:
* Implementation for setup and cleanup for sceGxm specific stuffs
*/
#include "shared.h"
static void *vdm_ring_buffer_addr; // VDM ring buffer memblock starting address
static void *vertex_ring_buffer_addr; // vertex ring buffer memblock starting address
static void *fragment_ring_buffer_addr; // fragment ring buffer memblock starting address
static void *fragment_usse_ring_buffer_addr; // fragment USSE ring buffer memblock starting address
static SceGxmRenderTarget *gxm_render_target; // Display render target
static SceGxmColorSurface gxm_color_surfaces[DISPLAY_BUFFER_COUNT]; // Display color surfaces
static void *gxm_color_surfaces_addr[DISPLAY_BUFFER_COUNT]; // Display color surfaces memblock starting addresses
static SceGxmSyncObject *gxm_sync_objects[DISPLAY_BUFFER_COUNT]; // Display sync objects
static unsigned int gxm_front_buffer_index; // Display front buffer id
static unsigned int gxm_back_buffer_index; // Display back buffer id
static unsigned int gxm_scene_flags = 0; // Current gxm scene flags
static void *gxm_shader_patcher_buffer_addr; // Shader PAtcher buffer memblock starting address
static void *gxm_shader_patcher_vertex_usse_addr; // Shader Patcher vertex USSE memblock starting address
static void *gxm_shader_patcher_fragment_usse_addr; // Shader Patcher fragment USSE memblock starting address
static void *gxm_depth_surface_addr; // Depth surface memblock starting address
static void *gxm_stencil_surface_addr; // Stencil surface memblock starting address
static SceGxmDepthStencilSurface gxm_depth_stencil_surface; // Depth/Stencil surfaces setup for sceGxm
SceGxmContext *gxm_context; // sceGxm context instance
GLenum error = GL_NO_ERROR; // Error returned by glGetError
SceGxmShaderPatcher *gxm_shader_patcher; // sceGxmShaderPatcher shader patcher instance
matrix4x4 mvp_matrix; // ModelViewProjection Matrix
matrix4x4 projection_matrix; // Projection Matrix
matrix4x4 modelview_matrix; // ModelView Matrix
int DISPLAY_WIDTH; // Display width in pixels
int DISPLAY_HEIGHT; // Display height in pixels
int DISPLAY_STRIDE; // Display stride in pixels
float DISPLAY_WIDTH_FLOAT; // Display width in pixels (float)
float DISPLAY_HEIGHT_FLOAT; // Display height in pixels (float)
// sceDisplay callback data
struct display_queue_callback_data {
void *addr;
};
// sceGxmShaderPatcher custom allocator
static void *shader_patcher_host_alloc_cb(void *user_data, unsigned int size) {
return malloc(size);
}
// sceGxmShaderPatcher custom deallocator
static void shader_patcher_host_free_cb(void *user_data, void *mem) {
return free(mem);
}
// sceDisplay callback
static void display_queue_callback(const void *callbackData) {
// Populating sceDisplay framebuffer parameters
SceDisplayFrameBuf display_fb;
const struct display_queue_callback_data *cb_data = callbackData;
memset(&display_fb, 0, sizeof(SceDisplayFrameBuf));
display_fb.size = sizeof(SceDisplayFrameBuf);
display_fb.base = cb_data->addr;
display_fb.pitch = DISPLAY_STRIDE;
display_fb.pixelformat = SCE_DISPLAY_PIXELFORMAT_A8B8G8R8;
display_fb.width = DISPLAY_WIDTH;
display_fb.height = DISPLAY_HEIGHT;
// Setting sceDisplay framebuffer
sceDisplaySetFrameBuf(&display_fb, SCE_DISPLAY_SETBUF_NEXTFRAME);
// Performing VSync if enabled
if (vblank)
sceDisplayWaitVblankStart();
}
void initGxm(void) {
// Initializing sceGxm init parameters
SceGxmInitializeParams gxm_init_params;
memset(&gxm_init_params, 0, sizeof(SceGxmInitializeParams));
gxm_init_params.flags = 0;
gxm_init_params.displayQueueMaxPendingCount = DISPLAY_BUFFER_COUNT - 1;
gxm_init_params.displayQueueCallback = display_queue_callback;
gxm_init_params.displayQueueCallbackDataSize = sizeof(struct display_queue_callback_data);
gxm_init_params.parameterBufferSize = SCE_GXM_DEFAULT_PARAMETER_BUFFER_SIZE;
// Initializing sceGxm
sceGxmInitialize(&gxm_init_params);
}
void initGxmContext(void) {
vglMemType type = VGL_MEM_VRAM;
// Allocating VDM ring buffer
vdm_ring_buffer_addr = gpu_alloc_mapped(SCE_GXM_DEFAULT_VDM_RING_BUFFER_SIZE, &type);
// Allocating vertex ring buffer
vertex_ring_buffer_addr = gpu_alloc_mapped(SCE_GXM_DEFAULT_VERTEX_RING_BUFFER_SIZE, &type);
// Allocating fragment ring buffer
fragment_ring_buffer_addr = gpu_alloc_mapped(SCE_GXM_DEFAULT_FRAGMENT_RING_BUFFER_SIZE, &type);
// Allocating fragment USSE ring buffer
unsigned int fragment_usse_offset;
fragment_usse_ring_buffer_addr = gpu_fragment_usse_alloc_mapped(
SCE_GXM_DEFAULT_FRAGMENT_USSE_RING_BUFFER_SIZE, &fragment_usse_offset);
// Setting sceGxm context parameters
SceGxmContextParams gxm_context_params;
memset(&gxm_context_params, 0, sizeof(SceGxmContextParams));
gxm_context_params.hostMem = malloc(SCE_GXM_MINIMUM_CONTEXT_HOST_MEM_SIZE);
gxm_context_params.hostMemSize = SCE_GXM_MINIMUM_CONTEXT_HOST_MEM_SIZE;
gxm_context_params.vdmRingBufferMem = vdm_ring_buffer_addr;
gxm_context_params.vdmRingBufferMemSize = SCE_GXM_DEFAULT_VDM_RING_BUFFER_SIZE;
gxm_context_params.vertexRingBufferMem = vertex_ring_buffer_addr;
gxm_context_params.vertexRingBufferMemSize = SCE_GXM_DEFAULT_VERTEX_RING_BUFFER_SIZE;
gxm_context_params.fragmentRingBufferMem = fragment_ring_buffer_addr;
gxm_context_params.fragmentRingBufferMemSize = SCE_GXM_DEFAULT_FRAGMENT_RING_BUFFER_SIZE;
gxm_context_params.fragmentUsseRingBufferMem = fragment_usse_ring_buffer_addr;
gxm_context_params.fragmentUsseRingBufferMemSize = SCE_GXM_DEFAULT_FRAGMENT_USSE_RING_BUFFER_SIZE;
gxm_context_params.fragmentUsseRingBufferOffset = fragment_usse_offset;
// Initializing sceGxm context
sceGxmCreateContext(&gxm_context_params, &gxm_context);
}
void termGxmContext(void) {
// Deallocating ring buffers
mempool_free(vdm_ring_buffer_addr, VGL_MEM_VRAM);
mempool_free(vertex_ring_buffer_addr, VGL_MEM_VRAM);
mempool_free(fragment_ring_buffer_addr, VGL_MEM_VRAM);
gpu_fragment_usse_free_mapped(fragment_usse_ring_buffer_addr);
// Destroying sceGxm context
sceGxmDestroyContext(gxm_context);
}
void createDisplayRenderTarget(void) {
// Populating sceGxmRenderTarget parameters
SceGxmRenderTargetParams render_target_params;
memset(&render_target_params, 0, sizeof(SceGxmRenderTargetParams));
render_target_params.flags = 0;
render_target_params.width = DISPLAY_WIDTH;
render_target_params.height = DISPLAY_HEIGHT;
render_target_params.scenesPerFrame = 1;
render_target_params.multisampleMode = msaa_mode;
render_target_params.multisampleLocations = 0;
render_target_params.driverMemBlock = -1;
// Creating render target for the display
sceGxmCreateRenderTarget(&render_target_params, &gxm_render_target);
}
void destroyDisplayRenderTarget(void) {
// Destroying render target for the display
sceGxmDestroyRenderTarget(gxm_render_target);
}
void initDisplayColorSurfaces(void) {
vglMemType type = VGL_MEM_VRAM;
int i;
for (i = 0; i < DISPLAY_BUFFER_COUNT; i++) {
// Allocating color surface memblock
gxm_color_surfaces_addr[i] = gpu_alloc_mapped(
ALIGN(4 * DISPLAY_STRIDE * DISPLAY_HEIGHT, 1 * 1024 * 1024),
&type);
// Initializing allocated color surface
memset(gxm_color_surfaces_addr[i], 0, DISPLAY_STRIDE * DISPLAY_HEIGHT);
sceGxmColorSurfaceInit(&gxm_color_surfaces[i],
SCE_GXM_COLOR_FORMAT_A8B8G8R8,
SCE_GXM_COLOR_SURFACE_LINEAR,
msaa_mode == SCE_GXM_MULTISAMPLE_NONE ? SCE_GXM_COLOR_SURFACE_SCALE_NONE : SCE_GXM_COLOR_SURFACE_SCALE_MSAA_DOWNSCALE,
SCE_GXM_OUTPUT_REGISTER_SIZE_32BIT,
DISPLAY_WIDTH,
DISPLAY_HEIGHT,
DISPLAY_STRIDE,
gxm_color_surfaces_addr[i]);
// Creating a display sync object for the allocated color surface
sceGxmSyncObjectCreate(&gxm_sync_objects[i]);
}
}
void termDisplayColorSurfaces(void) {
// Deallocating display's color surfaces and destroying sync objects
int i;
for (i = 0; i < DISPLAY_BUFFER_COUNT; i++) {
mempool_free(gxm_color_surfaces_addr[i], VGL_MEM_VRAM);
sceGxmSyncObjectDestroy(gxm_sync_objects[i]);
}
}
void initDepthStencilBuffer(uint32_t w, uint32_t h, SceGxmDepthStencilSurface *surface, void **depth_buffer, void **stencil_buffer, vglMemType *depth_type, vglMemType *stencil_type) {
// Calculating sizes for depth and stencil surfaces
unsigned int depth_stencil_width = ALIGN(w, SCE_GXM_TILE_SIZEX);
unsigned int depth_stencil_height = ALIGN(h, SCE_GXM_TILE_SIZEY);
unsigned int depth_stencil_samples = depth_stencil_width * depth_stencil_height;
if (msaa_mode == SCE_GXM_MULTISAMPLE_2X)
depth_stencil_samples = depth_stencil_samples * 2;
else if (msaa_mode == SCE_GXM_MULTISAMPLE_4X)
depth_stencil_samples = depth_stencil_samples * 4;
// Allocating depth surface
*depth_type = VGL_MEM_VRAM;
*depth_buffer = gpu_alloc_mapped(4 * depth_stencil_samples, depth_type);
// Allocating stencil surface
*stencil_type = VGL_MEM_VRAM;
*stencil_buffer = gpu_alloc_mapped(1 * depth_stencil_samples, stencil_type);
// Initializing depth and stencil surfaces
sceGxmDepthStencilSurfaceInit(surface,
SCE_GXM_DEPTH_STENCIL_FORMAT_DF32M_S8,
SCE_GXM_DEPTH_STENCIL_SURFACE_TILED,
msaa_mode == SCE_GXM_MULTISAMPLE_4X ? depth_stencil_width * 2 : depth_stencil_width,
*depth_buffer,
*stencil_buffer);
}
void initDepthStencilSurfaces(void) {
vglMemType t1, t2;
initDepthStencilBuffer(DISPLAY_WIDTH, DISPLAY_HEIGHT, &gxm_depth_stencil_surface, &gxm_depth_surface_addr, &gxm_stencil_surface_addr, &t1, &t2);
}
void termDepthStencilSurfaces(void) {
// Deallocating depth and stencil surfaces memblocks
mempool_free(gxm_depth_surface_addr, VGL_MEM_VRAM);
mempool_free(gxm_stencil_surface_addr, VGL_MEM_VRAM);
}
void startShaderPatcher(void) {
// Constants for shader patcher buffers
static const unsigned int shader_patcher_buffer_size = 1024 * 1024;
static const unsigned int shader_patcher_vertex_usse_size = 1024 * 1024;
static const unsigned int shader_patcher_fragment_usse_size = 1024 * 1024;
vglMemType type = VGL_MEM_VRAM;
// Allocating Shader Patcher buffer
gxm_shader_patcher_buffer_addr = gpu_alloc_mapped(
shader_patcher_buffer_size, &type);
// Allocating Shader Patcher vertex USSE buffer
unsigned int shader_patcher_vertex_usse_offset;
gxm_shader_patcher_vertex_usse_addr = gpu_vertex_usse_alloc_mapped(
shader_patcher_vertex_usse_size, &shader_patcher_vertex_usse_offset);
// Allocating Shader Patcher fragment USSE buffer
unsigned int shader_patcher_fragment_usse_offset;
gxm_shader_patcher_fragment_usse_addr = gpu_fragment_usse_alloc_mapped(
shader_patcher_fragment_usse_size, &shader_patcher_fragment_usse_offset);
// Populating shader patcher parameters
SceGxmShaderPatcherParams shader_patcher_params;
memset(&shader_patcher_params, 0, sizeof(SceGxmShaderPatcherParams));
shader_patcher_params.userData = NULL;
shader_patcher_params.hostAllocCallback = shader_patcher_host_alloc_cb;
shader_patcher_params.hostFreeCallback = shader_patcher_host_free_cb;
shader_patcher_params.bufferAllocCallback = NULL;
shader_patcher_params.bufferFreeCallback = NULL;
shader_patcher_params.bufferMem = gxm_shader_patcher_buffer_addr;
shader_patcher_params.bufferMemSize = shader_patcher_buffer_size;
shader_patcher_params.vertexUsseAllocCallback = NULL;
shader_patcher_params.vertexUsseFreeCallback = NULL;
shader_patcher_params.vertexUsseMem = gxm_shader_patcher_vertex_usse_addr;
shader_patcher_params.vertexUsseMemSize = shader_patcher_vertex_usse_size;
shader_patcher_params.vertexUsseOffset = shader_patcher_vertex_usse_offset;
shader_patcher_params.fragmentUsseAllocCallback = NULL;
shader_patcher_params.fragmentUsseFreeCallback = NULL;
shader_patcher_params.fragmentUsseMem = gxm_shader_patcher_fragment_usse_addr;
shader_patcher_params.fragmentUsseMemSize = shader_patcher_fragment_usse_size;
shader_patcher_params.fragmentUsseOffset = shader_patcher_fragment_usse_offset;
// Creating shader patcher instance
sceGxmShaderPatcherCreate(&shader_patcher_params, &gxm_shader_patcher);
}
void stopShaderPatcher(void) {
// Destroying shader patcher instance
sceGxmShaderPatcherDestroy(gxm_shader_patcher);
// Freeing shader patcher buffers
mempool_free(gxm_shader_patcher_buffer_addr, VGL_MEM_VRAM);
gpu_vertex_usse_free_mapped(gxm_shader_patcher_vertex_usse_addr);
gpu_fragment_usse_free_mapped(gxm_shader_patcher_fragment_usse_addr);
}
void waitRenderingDone(void) {
// Wait for rendering to be finished
sceGxmDisplayQueueFinish();
sceGxmFinish(gxm_context);
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void vglStartRendering(void) {
// Starting drawing scene
if (active_write_fb == NULL) { // Default framebuffer is used
sceGxmBeginScene(gxm_context, gxm_scene_flags, gxm_render_target,
NULL, NULL,
gxm_sync_objects[gxm_back_buffer_index],
&gxm_color_surfaces[gxm_back_buffer_index],
&gxm_depth_stencil_surface);
gxm_scene_flags &= ~SCE_GXM_SCENE_VERTEX_WAIT_FOR_DEPENDENCY;
} else {
gxm_scene_flags |= SCE_GXM_SCENE_FRAGMENT_SET_DEPENDENCY;
sceGxmBeginScene(gxm_context, gxm_scene_flags, active_write_fb->target,
NULL, NULL, NULL,
&active_write_fb->colorbuffer,
&active_write_fb->depthbuffer);
gxm_scene_flags |= SCE_GXM_SCENE_VERTEX_WAIT_FOR_DEPENDENCY;
gxm_scene_flags &= ~SCE_GXM_SCENE_FRAGMENT_SET_DEPENDENCY;
}
// Setting back current viewport if enabled cause sceGxm will reset it at sceGxmEndScene call
if (scissor_test_state) {
if (viewport_mode)
sceGxmSetViewport(gxm_context, x_port, x_scale, y_port, y_scale, z_port, z_scale);
sceGxmSetRegionClip(gxm_context, SCE_GXM_REGION_CLIP_OUTSIDE, region.x, region.y, region.x + region.w, region.y + region.h);
} else if (viewport_mode) {
sceGxmSetViewport(gxm_context, x_port, x_scale, y_port, y_scale, z_port, z_scale);
sceGxmSetRegionClip(gxm_context, SCE_GXM_REGION_CLIP_OUTSIDE, gl_viewport.x, DISPLAY_HEIGHT - gl_viewport.y - gl_viewport.h, gl_viewport.x + gl_viewport.w, gl_viewport.y + gl_viewport.h);
}
}
void vglStopRenderingInit(void) {
// Ending drawing scene
sceGxmEndScene(gxm_context, NULL, NULL);
}
void vglStopRenderingTerm(void) {
// Waiting GPU to complete its work
sceGxmFinish(gxm_context);
if (active_write_fb == NULL) { // Default framebuffer is used
// Properly requesting a display update
struct display_queue_callback_data queue_cb_data;
queue_cb_data.addr = gxm_color_surfaces_addr[gxm_back_buffer_index];
sceGxmDisplayQueueAddEntry(gxm_sync_objects[gxm_front_buffer_index],
gxm_sync_objects[gxm_back_buffer_index], &queue_cb_data);
gxm_front_buffer_index = gxm_back_buffer_index;
gxm_back_buffer_index = (gxm_back_buffer_index + 1) % DISPLAY_BUFFER_COUNT;
}
// Resetting vitaGL mempool
gpu_pool_reset();
}
void vglStopRendering() {
// Ending drawing scene
vglStopRenderingInit();
// Updating display and resetting vitaGL mempool
vglStopRenderingTerm();
}
void vglUpdateCommonDialog() {
// Populating SceCommonDialog parameters
SceCommonDialogUpdateParam updateParam;
memset(&updateParam, 0, sizeof(updateParam));
updateParam.renderTarget.colorFormat = SCE_GXM_COLOR_FORMAT_A8B8G8R8;
updateParam.renderTarget.surfaceType = SCE_GXM_COLOR_SURFACE_LINEAR;
updateParam.renderTarget.width = DISPLAY_WIDTH;
updateParam.renderTarget.height = DISPLAY_HEIGHT;
updateParam.renderTarget.strideInPixels = DISPLAY_STRIDE;
updateParam.renderTarget.colorSurfaceData = gxm_color_surfaces_addr[gxm_back_buffer_index];
updateParam.renderTarget.depthSurfaceData = gxm_depth_surface_addr;
updateParam.displaySyncObject = gxm_sync_objects[gxm_back_buffer_index];
// Updating sceCommonDialog
sceCommonDialogUpdate(&updateParam);
}
void glFinish(void) {
// Waiting for GPU to finish drawing jobs
sceGxmFinish(gxm_context);
}

67
deps/vitaGL/source/hacks/sbrk.c vendored Normal file
View File

@ -0,0 +1,67 @@
#include <errno.h>
#include <reent.h>
#include <vitasdk.h>
extern unsigned int _newlib_heap_size_user __attribute__((weak));
int _newlib_heap_memblock;
unsigned _newlib_heap_size;
static char *_newlib_heap_base, *_newlib_heap_end, *_newlib_heap_cur;
static char _newlib_sbrk_mutex[32] __attribute__((aligned(8)));
void *_sbrk_r(struct _reent *reent, ptrdiff_t incr) {
if (sceKernelLockLwMutex((SceKernelLwMutexWork *)_newlib_sbrk_mutex, 1, 0) < 0)
goto fail;
if (!_newlib_heap_base || _newlib_heap_cur + incr >= _newlib_heap_end) {
sceKernelUnlockLwMutex((SceKernelLwMutexWork *)_newlib_sbrk_mutex, 1);
fail:
reent->_errno = ENOMEM;
return (void *)-1;
}
char *prev_heap_end = _newlib_heap_cur;
_newlib_heap_cur += incr;
sceKernelUnlockLwMutex((SceKernelLwMutexWork *)_newlib_sbrk_mutex, 1);
return (void *)prev_heap_end;
}
void _init_vita_heap(void) {
// Create a mutex to use inside _sbrk_r
if (sceKernelCreateLwMutex((SceKernelLwMutexWork *)_newlib_sbrk_mutex, "sbrk mutex", 0, 0, 0) < 0) {
goto failure;
}
if (&_newlib_heap_size_user != NULL) {
_newlib_heap_size = _newlib_heap_size_user;
} else {
// Create a memblock for the heap memory, 32MB
_newlib_heap_size = 32 * 1024 * 1024;
}
_newlib_heap_memblock = sceKernelAllocMemBlock("Newlib heap", 0x0c20d060, _newlib_heap_size, 0);
if (_newlib_heap_memblock < 0) {
goto failure;
}
if (sceKernelGetMemBlockBase(_newlib_heap_memblock, (void *)&_newlib_heap_base) < 0) {
goto failure;
}
_newlib_heap_end = _newlib_heap_base + _newlib_heap_size;
_newlib_heap_cur = _newlib_heap_base;
return;
failure:
_newlib_heap_memblock = 0;
_newlib_heap_base = 0;
_newlib_heap_cur = 0;
}
void _free_vita_heap(void) {
// Destroy the sbrk mutex
sceKernelDeleteLwMutex((SceKernelLwMutexWork *)_newlib_sbrk_mutex);
// Free the heap memblock to avoid memory leakage.
sceKernelFreeMemBlock(_newlib_heap_memblock);
_newlib_heap_memblock = 0;
_newlib_heap_base = 0;
_newlib_heap_cur = 0;
}

584
deps/vitaGL/source/legacy.c vendored Normal file
View File

@ -0,0 +1,584 @@
/*
* legacy.c:
* Implementation for legacy openGL 1.0 rendering method
*/
#include "shared.h"
// Vertex list struct
typedef struct vertexList {
vector3f v;
void *next;
} vertexList;
// Color vertex list struct
typedef struct rgbaList {
vector4f v;
void *next;
} rgbaList;
// Texture coord list struct
typedef struct uvList {
vector2f v;
void *next;
} uvList;
static vertexList *model_vertices = NULL; // Pointer to vertex list
static vertexList *last_vert = NULL; // Pointer to last element in vertex list
static rgbaList *model_color = NULL; // Pointer to color vertex list
static rgbaList *last_clr = NULL; // Pointer to last element in color vertex list
static uvList *model_uv = NULL; // Pointer to texcoord list
static uvList *last_uv = NULL; // Pointer to last element in texcoord list
static uint64_t vertex_count = 0; // Vertex counter for vertex list
static SceGxmPrimitiveType prim; // Current in use primitive for rendering
static SceGxmPrimitiveTypeExtra prim_extra = SCE_GXM_PRIMITIVE_NONE; // Current in use non native primitive for rendering
static uint8_t np = 0xFF; // Number of expected vertices per element for current in use primitive
vector4f current_color = { 1.0f, 1.0f, 1.0f, 1.0f }; // Current in use color
static void purge_vertex_list() {
vertexList *old;
rgbaList *old2;
uvList *old3;
// Purging color and vertex lists
while (model_vertices != NULL) {
old = model_vertices;
old2 = model_color;
model_vertices = model_vertices->next;
model_color = model_color->next;
free(old);
free(old2);
}
// Purging texcoord list
while (model_uv != NULL) {
old3 = model_uv;
model_uv = model_uv->next;
free(old3);
}
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glVertex3f(GLfloat x, GLfloat y, GLfloat z) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Adding a new element to color and vertex lists
if (model_vertices == NULL) {
model_vertices = last_vert = (vertexList *)malloc(sizeof(vertexList));
model_color = last_clr = (rgbaList *)malloc(sizeof(rgbaList));
} else {
last_vert->next = (vertexList *)malloc(sizeof(vertexList));
last_clr->next = (rgbaList *)malloc(sizeof(rgbaList));
last_vert = last_vert->next;
last_clr = last_clr->next;
}
// Properly populating the new element
last_vert->v.x = x;
last_vert->v.y = y;
last_vert->v.z = z;
memcpy(&last_clr->v, &current_color.r, sizeof(vector4f));
last_clr->next = last_vert->next = NULL;
// Increasing vertex counter
vertex_count++;
}
void glVertex3fv(const GLfloat *v) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Adding a new element to color and vertex lists
if (model_vertices == NULL) {
model_vertices = last_vert = (vertexList *)malloc(sizeof(vertexList));
model_color = last_clr = (rgbaList *)malloc(sizeof(rgbaList));
} else {
last_vert->next = (vertexList *)malloc(sizeof(vertexList));
last_clr->next = (rgbaList *)malloc(sizeof(rgbaList));
last_vert = last_vert->next;
last_clr = last_clr->next;
}
// Properly populating the new element
memcpy(&last_vert->v, v, sizeof(vector3f));
memcpy(&last_clr->v, &current_color.r, sizeof(vector4f));
last_clr->next = last_vert->next = NULL;
// Increasing vertex counter
vertex_count++;
}
void glVertex2f(GLfloat x, GLfloat y) {
glVertex3f(x, y, 0.0f);
}
void glColor3f(GLfloat red, GLfloat green, GLfloat blue) {
// Setting current color value
current_color.r = red;
current_color.g = green;
current_color.b = blue;
current_color.a = 1.0f;
}
void glColor3fv(const GLfloat *v) {
// Setting current color value
memcpy(&current_color.r, v, sizeof(vector3f));
current_color.a = 1.0f;
}
void glColor3ub(GLubyte red, GLubyte green, GLubyte blue) {
// Setting current color value
current_color.r = (1.0f * red) / 255.0f;
current_color.g = (1.0f * green) / 255.0f;
current_color.b = (1.0f * blue) / 255.0f;
current_color.a = 1.0f;
}
void glColor3ubv(const GLubyte *c) {
// Setting current color value
current_color.r = (1.0f * c[0]) / 255.0f;
current_color.g = (1.0f * c[1]) / 255.0f;
current_color.b = (1.0f * c[2]) / 255.0f;
current_color.a = 1.0f;
}
void glColor4f(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) {
// Setting current color value
current_color.r = red;
current_color.g = green;
current_color.b = blue;
current_color.a = alpha;
}
void glColor4fv(const GLfloat *v) {
// Setting current color value
memcpy(&current_color.r, v, sizeof(vector4f));
}
void glColor4ub(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha) {
current_color.r = (1.0f * red) / 255.0f;
current_color.g = (1.0f * green) / 255.0f;
current_color.b = (1.0f * blue) / 255.0f;
current_color.a = (1.0f * alpha) / 255.0f;
}
void glColor4ubv(const GLubyte *c) {
// Setting current color value
current_color.r = (1.0f * c[0]) / 255.0f;
current_color.g = (1.0f * c[1]) / 255.0f;
current_color.b = (1.0f * c[2]) / 255.0f;
current_color.a = (1.0f * c[3]) / 255.0f;
}
void glTexCoord2fv(GLfloat *f) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Adding a new element to texcoord list
if (model_uv == NULL) {
model_uv = last_uv = (uvList *)malloc(sizeof(uvList));
} else {
last_uv->next = (uvList *)malloc(sizeof(uvList));
last_uv = last_uv->next;
}
// Properly populating the new element
last_uv->v.x = f[0];
last_uv->v.y = f[1];
last_uv->next = NULL;
}
void glTexCoord2f(GLfloat s, GLfloat t) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Adding a new element to texcoord list
if (model_uv == NULL) {
model_uv = last_uv = (uvList *)malloc(sizeof(uvList));
} else {
last_uv->next = (uvList *)malloc(sizeof(uvList));
last_uv = last_uv->next;
}
// Properly populating the new element
last_uv->v.x = s;
last_uv->v.y = t;
last_uv->next = NULL;
}
void glTexCoord2i(GLint s, GLint t) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Adding a new element to texcoord list
if (model_uv == NULL) {
model_uv = last_uv = (uvList *)malloc(sizeof(uvList));
} else {
last_uv->next = (uvList *)malloc(sizeof(uvList));
last_uv = last_uv->next;
}
// Properly populating the new element
last_uv->v.x = s;
last_uv->v.y = t;
last_uv->next = NULL;
}
void glArrayElement(GLint i) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (i < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Aliasing client texture unit and client texture id for better code readability
texture_unit *tex_unit = &texture_units[client_texture_unit];
int texture2d_idx = tex_unit->tex_id;
// Checking if current texture unit has GL_VERTEX_ARRAY enabled
if (tex_unit->vertex_array_state) {
// Calculating offset of requested element
uint8_t *ptr;
if (tex_unit->vertex_array.stride == 0)
ptr = ((uint8_t *)tex_unit->vertex_array.pointer) + (i * (tex_unit->vertex_array.num * tex_unit->vertex_array.size));
else
ptr = ((uint8_t *)tex_unit->vertex_array.pointer) + (i * tex_unit->vertex_array.stride);
// Adding a new element to vertex and color lists
if (model_vertices == NULL) {
model_vertices = last_vert = (vertexList *)malloc(sizeof(vertexList));
model_color = last_clr = (rgbaList *)malloc(sizeof(rgbaList));
} else {
last_vert->next = (vertexList *)malloc(sizeof(vertexList));
last_clr->next = (rgbaList *)malloc(sizeof(rgbaList));
last_vert = last_vert->next;
last_clr = last_clr->next;
}
last_vert->next = NULL;
last_clr->next = NULL;
// Populating new vertex element
memcpy(&last_vert->v, ptr, tex_unit->vertex_array.size * tex_unit->vertex_array.num);
// Checking if current texture unit has GL_COLOR_ARRAY enabled
if (tex_unit->color_array_state) {
// Calculating offset of requested element
uint8_t *ptr_clr;
if (tex_unit->color_array.stride == 0)
ptr_clr = ((uint8_t *)tex_unit->color_array.pointer) + (i * (tex_unit->color_array.num * tex_unit->color_array.size));
else
ptr_clr = ((uint8_t *)tex_unit->color_array.pointer) + (i * tex_unit->color_array.stride);
// Populating new color element
last_clr->v.a = 1.0f;
memcpy(&last_clr->v, ptr_clr, tex_unit->color_array.size * tex_unit->color_array.num);
} else {
// Populating new color element with current color
memcpy(&last_clr->v, &current_color.r, sizeof(vector4f));
}
// Checking if current texture unit has GL_TEXTURE_COORD_ARRAY enabled
if (tex_unit->texture_array_state) {
// Calculating offset of requested element
uint8_t *ptr_tex;
if (tex_unit->texture_array.stride == 0)
ptr_tex = ((uint8_t *)tex_unit->texture_array.pointer) + (i * (tex_unit->texture_array.num * tex_unit->texture_array.size));
else
ptr_tex = ((uint8_t *)tex_unit->texture_array.pointer) + (i * tex_unit->texture_array.stride);
// Adding a new element to texcoord list
if (model_uv == NULL) {
model_uv = last_uv = (uvList *)malloc(sizeof(uvList));
} else {
last_uv->next = (uvList *)malloc(sizeof(uvList));
last_uv = last_uv->next;
}
// Populating new texcoord element
memcpy(&last_uv->v, ptr_tex, tex_unit->vertex_array.size * 2);
last_uv->next = NULL;
}
}
}
void glBegin(GLenum mode) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Changing current openGL machine state
phase = MODEL_CREATION;
// Translating primitive to sceGxm one
prim_extra = SCE_GXM_PRIMITIVE_NONE;
switch (mode) {
case GL_POINTS:
prim = SCE_GXM_PRIMITIVE_POINTS;
np = 1;
break;
case GL_LINES:
prim = SCE_GXM_PRIMITIVE_LINES;
np = 2;
break;
case GL_TRIANGLES:
prim = SCE_GXM_PRIMITIVE_TRIANGLES;
np = 3;
break;
case GL_TRIANGLE_STRIP:
prim = SCE_GXM_PRIMITIVE_TRIANGLE_STRIP;
np = 1;
break;
case GL_TRIANGLE_FAN:
prim = SCE_GXM_PRIMITIVE_TRIANGLE_FAN;
np = 1;
break;
case GL_QUADS:
prim = SCE_GXM_PRIMITIVE_TRIANGLES;
prim_extra = SCE_GXM_PRIMITIVE_QUADS;
np = 4;
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Resetting vertex count
vertex_count = 0;
}
void glEnd(void) {
#ifndef SKIP_ERROR_HANDLING
// Integrity checks
if (vertex_count == 0 || ((vertex_count % np) != 0))
return;
// Error handling
if (phase != MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Changing current openGL machine state
phase = NONE;
// Checking if we can totally skip drawing cause of culling mode
if (no_polygons_mode && ((prim == SCE_GXM_PRIMITIVE_TRIANGLES) || (prim >= SCE_GXM_PRIMITIVE_TRIANGLE_STRIP))) {
purge_vertex_list();
vertex_count = 0;
return;
}
// Aliasing server texture unit and texture id for better code readability
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
// Calculating mvp matrix
if (mvp_modified) {
matrix4x4_multiply(mvp_matrix, projection_matrix, modelview_matrix);
mvp_modified = GL_FALSE;
}
// Checking if we have to write a texture
if ((server_texture_unit >= 0) && (tex_unit->enabled) && (model_uv != NULL) && (tex_unit->textures[texture2d_idx].valid)) {
// Setting proper vertex and fragment programs
sceGxmSetVertexProgram(gxm_context, texture2d_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, texture2d_fragment_program_patched);
// Setting fragment uniforms for alpha test and texture environment
void *alpha_buffer;
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &alpha_buffer);
sceGxmSetUniformDataF(alpha_buffer, texture2d_alpha_cut, 0, 1, &alpha_ref);
float alpha_operation = (float)alpha_op;
sceGxmSetUniformDataF(alpha_buffer, texture2d_alpha_op, 0, 1, &alpha_operation);
sceGxmSetUniformDataF(alpha_buffer, texture2d_tint_color, 0, 4, &current_color.r);
float tex_env = (float)tex_unit->env_mode;
sceGxmSetUniformDataF(alpha_buffer, texture2d_tex_env, 0, 1, &tex_env);
float fogmode = (float)internal_fog_mode;
sceGxmSetUniformDataF(alpha_buffer, texture2d_fog_mode, 0, 1, &fogmode);
sceGxmSetUniformDataF(alpha_buffer, texture2d_fog_color, 0, 4, &fog_color.r);
sceGxmSetUniformDataF(alpha_buffer, texture2d_tex_env_color, 0, 4, &texenv_color.r);
} else {
// Setting proper vertex and fragment programs
sceGxmSetVertexProgram(gxm_context, rgba_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, rgba_fragment_program_patched);
}
// Reserving default uniform buffer for wvp
int i, j;
void *vertex_wvp_buffer;
sceGxmReserveVertexDefaultUniformBuffer(gxm_context, &vertex_wvp_buffer);
// Checking if we have to write a texture
if (model_uv != NULL) {
// Setting wvp matrix
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_wvp, 0, 16, (const float *)mvp_matrix);
// Setting fogging uniforms
float fogmode = (float)internal_fog_mode;
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_fog_mode2, 0, 1, (const float *)&fogmode);
float clipplane0 = (float)clip_plane0;
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_clip_plane0, 0, 1, &clipplane0);
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_clip_plane0_eq, 0, 4, &clip_plane0_eq.x);
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_mv, 0, 16, (const float *)modelview_matrix);
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_fog_near, 0, 1, (const float *)&fog_near);
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_fog_far, 0, 1, (const float *)&fog_far);
sceGxmSetUniformDataF(vertex_wvp_buffer, texture2d_fog_density, 0, 1, (const float *)&fog_density);
// Setting in use texture
sceGxmSetFragmentTexture(gxm_context, 0, &tex_unit->textures[texture2d_idx].gxm_tex);
// Properly generating vertices, uv map and indices buffers
vector3f *vertices;
vector2f *uv_map;
uint16_t *indices;
int n = 0, quad_n = 0;
vertexList *object = model_vertices;
uvList *object_uv = model_uv;
uint64_t idx_count = vertex_count;
switch (prim_extra) {
case SCE_GXM_PRIMITIVE_NONE:
vertices = (vector3f *)gpu_pool_memalign(vertex_count * sizeof(vector3f), sizeof(vector3f));
uv_map = (vector2f *)gpu_pool_memalign(vertex_count * sizeof(vector2f), sizeof(vector2f));
memset(vertices, 0, (vertex_count * sizeof(vector3f)));
indices = (uint16_t *)gpu_pool_memalign(idx_count * sizeof(uint16_t), sizeof(uint16_t));
for (i = 0; i < vertex_count; i++) {
memcpy(&vertices[n], &object->v, sizeof(vector3f));
memcpy(&uv_map[n], &object_uv->v, sizeof(vector2f));
indices[n] = n;
object = object->next;
object_uv = object_uv->next;
n++;
}
break;
case SCE_GXM_PRIMITIVE_QUADS:
quad_n = vertex_count >> 2;
idx_count = quad_n * 6;
vertices = (vector3f *)gpu_pool_memalign(vertex_count * sizeof(vector3f), sizeof(vector3f));
uv_map = (vector2f *)gpu_pool_memalign(vertex_count * sizeof(vector2f), sizeof(vector2f));
memset(vertices, 0, (vertex_count * sizeof(vector3f)));
indices = (uint16_t *)gpu_pool_memalign(idx_count * sizeof(uint16_t), sizeof(uint16_t));
for (i = 0; i < quad_n; i++) {
indices[i * 6] = i * 4;
indices[i * 6 + 1] = i * 4 + 1;
indices[i * 6 + 2] = i * 4 + 3;
indices[i * 6 + 3] = i * 4 + 1;
indices[i * 6 + 4] = i * 4 + 2;
indices[i * 6 + 5] = i * 4 + 3;
}
for (j = 0; j < vertex_count; j++) {
memcpy(&vertices[j], &object->v, sizeof(vector3f));
memcpy(&uv_map[j], &object_uv->v, sizeof(vector2f));
object = object->next;
object_uv = object_uv->next;
}
break;
}
// Performing the requested draw call
sceGxmSetVertexStream(gxm_context, 0, vertices);
sceGxmSetVertexStream(gxm_context, 1, uv_map);
sceGxmDraw(gxm_context, prim, SCE_GXM_INDEX_FORMAT_U16, indices, idx_count);
} else {
// Setting wvp matrix
sceGxmSetUniformDataF(vertex_wvp_buffer, rgba_wvp, 0, 16, (const float *)mvp_matrix);
// Properly generating vertices, colors and indices buffers
vector3f *vertices;
vector4f *colors;
uint16_t *indices;
int n = 0, quad_n = 0;
vertexList *object = model_vertices;
rgbaList *object_clr = model_color;
uint64_t idx_count = vertex_count;
switch (prim_extra) {
case SCE_GXM_PRIMITIVE_NONE:
vertices = (vector3f *)gpu_pool_memalign(vertex_count * sizeof(vector3f), sizeof(vector3f));
colors = (vector4f *)gpu_pool_memalign(vertex_count * sizeof(vector4f), sizeof(vector4f));
memset(vertices, 0, (vertex_count * sizeof(vector3f)));
indices = (uint16_t *)gpu_pool_memalign(idx_count * sizeof(uint16_t), sizeof(uint16_t));
for (i = 0; i < vertex_count; i++) {
memcpy(&vertices[n], &object->v, sizeof(vector3f));
memcpy(&colors[n], &object_clr->v, sizeof(vector4f));
indices[n] = n;
object = object->next;
object_clr = object_clr->next;
n++;
}
break;
case SCE_GXM_PRIMITIVE_QUADS:
quad_n = vertex_count >> 2;
idx_count = quad_n * 6;
vertices = (vector3f *)gpu_pool_memalign(vertex_count * sizeof(vector3f), sizeof(vector3f));
colors = (vector4f *)gpu_pool_memalign(vertex_count * sizeof(vector4f), sizeof(vector4f));
memset(vertices, 0, (vertex_count * sizeof(vector3f)));
indices = (uint16_t *)gpu_pool_memalign(idx_count * sizeof(uint16_t), sizeof(uint16_t));
int i, j;
for (i = 0; i < quad_n; i++) {
indices[i * 6] = i * 4;
indices[i * 6 + 1] = i * 4 + 1;
indices[i * 6 + 2] = i * 4 + 3;
indices[i * 6 + 3] = i * 4 + 1;
indices[i * 6 + 4] = i * 4 + 2;
indices[i * 6 + 5] = i * 4 + 3;
}
for (j = 0; j < vertex_count; j++) {
memcpy(&vertices[j], &object->v, sizeof(vector3f));
memcpy(&colors[j], &object_clr->v, sizeof(vector4f));
object = object->next;
object_clr = object_clr->next;
}
break;
}
// Performing the requested draw call
sceGxmSetVertexStream(gxm_context, 0, vertices);
sceGxmSetVertexStream(gxm_context, 1, colors);
sceGxmDraw(gxm_context, prim, SCE_GXM_INDEX_FORMAT_U16, indices, idx_count);
}
// Purging vertex, colors and texcoord lists
purge_vertex_list();
vertex_count = 0;
}

213
deps/vitaGL/source/matrices.c vendored Normal file
View File

@ -0,0 +1,213 @@
/*
* matrices.c:
* Implementation for matrices related functions
*/
#include "shared.h"
matrix4x4 *matrix = NULL; // Current in-use matrix mode
static matrix4x4 modelview_matrix_stack[MODELVIEW_STACK_DEPTH]; // Modelview matrices stack
static uint8_t modelview_stack_counter = 0; // Modelview matrices stack counter
static matrix4x4 projection_matrix_stack[GENERIC_STACK_DEPTH]; // Projection matrices stack
static uint8_t projection_stack_counter = 0; // Projection matrices stack counter
GLboolean mvp_modified = GL_TRUE; // Check if ModelViewProjection matrix needs to be recreated
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glMatrixMode(GLenum mode) {
// Changing current in use matrix
switch (mode) {
case GL_MODELVIEW: // Modelview matrix
matrix = &modelview_matrix;
break;
case GL_PROJECTION: // Projection matrix
matrix = &projection_matrix;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glOrtho(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble nearVal, GLdouble farVal) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
} else if ((left == right) || (bottom == top) || (nearVal == farVal)) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Initializing ortho matrix with requested parameters
matrix4x4_init_orthographic(*matrix, left, right, bottom, top, nearVal, farVal);
mvp_modified = GL_TRUE;
}
void glFrustum(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble nearVal, GLdouble farVal) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
} else if ((left == right) || (bottom == top) || (nearVal < 0) || (farVal < 0)) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Initializing frustum matrix with requested parameters
matrix4x4_init_frustum(*matrix, left, right, bottom, top, nearVal, farVal);
mvp_modified = GL_TRUE;
}
void glLoadIdentity(void) {
// Set current in use matrix to identity one
matrix4x4_identity(*matrix);
mvp_modified = GL_TRUE;
}
void glMultMatrixf(const GLfloat *m) {
matrix4x4 res;
#ifdef TRANSPOSE_MATRICES
// Properly ordering matrix to perform multiplication
matrix4x4 tmp;
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
tmp[i][j] = m[j * 4 + i];
}
}
// Multiplicating passed matrix with in use one
matrix4x4_multiply(res, *matrix, tmp);
#else
// Multiplicating passed matrix with in use one
matrix4x4_multiply(res, *matrix, m);
#endif
// Copying result to in use matrix
matrix4x4_copy(*matrix, res);
mvp_modified = GL_TRUE;
}
void glLoadMatrixf(const GLfloat *m) {
#ifdef TRANSPOSE_MATRICES
// Properly ordering matrix
matrix4x4 tmp;
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
(*matrix)[i][j] = m[j * 4 + i];
}
}
#else
memcpy(*matrix, m, sizeof(matrix4x4));
#endif
mvp_modified = GL_TRUE;
}
void glTranslatef(GLfloat x, GLfloat y, GLfloat z) {
// Translating in use matrix
matrix4x4_translate(*matrix, x, y, z);
mvp_modified = GL_TRUE;
}
void glScalef(GLfloat x, GLfloat y, GLfloat z) {
// Scaling in use matrix
matrix4x4_scale(*matrix, x, y, z);
mvp_modified = GL_TRUE;
}
void glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Performing rotation on in use matrix depending on user call
float rad = DEG_TO_RAD(angle);
if (x == 1.0f) {
matrix4x4_rotate_x(*matrix, rad);
}
if (y == 1.0f) {
matrix4x4_rotate_y(*matrix, rad);
}
if (z == 1.0f) {
matrix4x4_rotate_z(*matrix, rad);
}
mvp_modified = GL_TRUE;
}
void glPushMatrix(void) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
if (matrix == &modelview_matrix) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (modelview_stack_counter >= MODELVIEW_STACK_DEPTH) {
error = GL_STACK_OVERFLOW;
} else
#endif
// Copying current matrix into the matrix stack and increasing stack counter
matrix4x4_copy(modelview_matrix_stack[modelview_stack_counter++], *matrix);
} else if (matrix == &projection_matrix) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (projection_stack_counter >= GENERIC_STACK_DEPTH) {
error = GL_STACK_OVERFLOW;
} else
#endif
// Copying current matrix into the matrix stack and increasing stack counter
matrix4x4_copy(projection_matrix_stack[projection_stack_counter++], *matrix);
}
}
void glPopMatrix(void) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
if (matrix == &modelview_matrix) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (modelview_stack_counter == 0)
error = GL_STACK_UNDERFLOW;
else
#endif
// Copying last matrix on stack into current matrix and decreasing stack counter
matrix4x4_copy(*matrix, modelview_matrix_stack[--modelview_stack_counter]);
} else if (matrix == &projection_matrix) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (projection_stack_counter == 0)
error = GL_STACK_UNDERFLOW;
else
#endif
// Copying last matrix on stack into current matrix and decreasing stack counter
matrix4x4_copy(*matrix, projection_matrix_stack[--projection_stack_counter]);
}
mvp_modified = GL_TRUE;
}

536
deps/vitaGL/source/misc.c vendored Normal file
View File

@ -0,0 +1,536 @@
/*
* misc.c:
* Implementation for miscellaneous functions
*/
#include "shared.h"
static void update_fogging_state() {
if (fogging) {
switch (fog_mode) {
case GL_LINEAR:
internal_fog_mode = LINEAR;
break;
case GL_EXP:
internal_fog_mode = EXP;
break;
default:
internal_fog_mode = EXP2;
break;
}
} else
internal_fog_mode = DISABLED;
}
static void update_polygon_offset() {
switch (polygon_mode_front) {
case SCE_GXM_POLYGON_MODE_TRIANGLE_LINE:
if (pol_offset_line)
sceGxmSetFrontDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetFrontDepthBias(gxm_context, 0, 0);
break;
case SCE_GXM_POLYGON_MODE_TRIANGLE_POINT:
if (pol_offset_point)
sceGxmSetFrontDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetFrontDepthBias(gxm_context, 0, 0);
break;
case SCE_GXM_POLYGON_MODE_TRIANGLE_FILL:
if (pol_offset_fill)
sceGxmSetFrontDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetFrontDepthBias(gxm_context, 0, 0);
break;
}
switch (polygon_mode_back) {
case SCE_GXM_POLYGON_MODE_TRIANGLE_LINE:
if (pol_offset_line)
sceGxmSetBackDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetBackDepthBias(gxm_context, 0, 0);
break;
case SCE_GXM_POLYGON_MODE_TRIANGLE_POINT:
if (pol_offset_point)
sceGxmSetBackDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetBackDepthBias(gxm_context, 0, 0);
break;
case SCE_GXM_POLYGON_MODE_TRIANGLE_FILL:
if (pol_offset_fill)
sceGxmSetBackDepthBias(gxm_context, (int)pol_factor, (int)pol_units);
else
sceGxmSetBackDepthBias(gxm_context, 0, 0);
break;
}
}
static void change_cull_mode() {
// Setting proper cull mode in sceGxm depending to current openGL machine state
if (cull_face_state) {
if ((gl_front_face == GL_CW) && (gl_cull_mode == GL_BACK))
sceGxmSetCullMode(gxm_context, SCE_GXM_CULL_CCW);
else if ((gl_front_face == GL_CCW) && (gl_cull_mode == GL_BACK))
sceGxmSetCullMode(gxm_context, SCE_GXM_CULL_CW);
else if ((gl_front_face == GL_CCW) && (gl_cull_mode == GL_FRONT))
sceGxmSetCullMode(gxm_context, SCE_GXM_CULL_CCW);
else if ((gl_front_face == GL_CW) && (gl_cull_mode == GL_FRONT))
sceGxmSetCullMode(gxm_context, SCE_GXM_CULL_CW);
else if (gl_cull_mode == GL_FRONT_AND_BACK)
no_polygons_mode = GL_TRUE;
} else
sceGxmSetCullMode(gxm_context, SCE_GXM_CULL_NONE);
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glPolygonMode(GLenum face, GLenum mode) {
SceGxmPolygonMode new_mode;
switch (mode) {
case GL_POINT:
new_mode = SCE_GXM_POLYGON_MODE_TRIANGLE_POINT;
break;
case GL_LINE:
new_mode = SCE_GXM_POLYGON_MODE_TRIANGLE_LINE;
break;
case GL_FILL:
new_mode = SCE_GXM_POLYGON_MODE_TRIANGLE_FILL;
break;
default:
error = GL_INVALID_ENUM;
break;
}
switch (face) {
case GL_FRONT:
polygon_mode_front = new_mode;
gl_polygon_mode_front = mode;
sceGxmSetFrontPolygonMode(gxm_context, new_mode);
break;
case GL_BACK:
polygon_mode_back = new_mode;
gl_polygon_mode_back = mode;
sceGxmSetBackPolygonMode(gxm_context, new_mode);
break;
case GL_FRONT_AND_BACK:
polygon_mode_front = polygon_mode_back = new_mode;
gl_polygon_mode_front = gl_polygon_mode_back = mode;
sceGxmSetFrontPolygonMode(gxm_context, new_mode);
sceGxmSetBackPolygonMode(gxm_context, new_mode);
break;
default:
error = GL_INVALID_ENUM;
return;
}
update_polygon_offset();
}
void glPolygonOffset(GLfloat factor, GLfloat units) {
pol_factor = factor;
pol_units = units;
update_polygon_offset();
}
void glCullFace(GLenum mode) {
gl_cull_mode = mode;
if (cull_face_state)
change_cull_mode();
}
void glFrontFace(GLenum mode) {
gl_front_face = mode;
if (cull_face_state)
change_cull_mode();
}
void glViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
#ifndef SKIP_ERROR_HANDLING
if ((width < 0) || (height < 0)) {
error = GL_INVALID_VALUE;
return;
}
#endif
x_scale = width >> 1;
x_port = x + x_scale;
y_scale = -(height >> 1);
y_port = DISPLAY_HEIGHT - y + y_scale;
sceGxmSetViewport(gxm_context, x_port, x_scale, y_port, y_scale, z_port, z_scale);
gl_viewport.x = x;
gl_viewport.y = y;
gl_viewport.w = width;
gl_viewport.h = height;
viewport_mode = 1;
}
void glDepthRange(GLdouble nearVal, GLdouble farVal) {
z_port = (farVal + nearVal) / 2.0f;
z_scale = (farVal - nearVal) / 2.0f;
sceGxmSetViewport(gxm_context, x_port, x_scale, y_port, y_scale, z_port, z_scale);
viewport_mode = 1;
}
void glDepthRangef(GLfloat nearVal, GLfloat farVal) {
z_port = (farVal + nearVal) / 2.0f;
z_scale = (farVal - nearVal) / 2.0f;
sceGxmSetViewport(gxm_context, x_port, x_scale, y_port, y_scale, z_port, z_scale);
viewport_mode = 1;
}
void glEnable(GLenum cap) {
#ifndef SKIP_ERROR_HANDLING
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
switch (cap) {
case GL_DEPTH_TEST:
depth_test_state = GL_TRUE;
change_depth_func();
break;
case GL_STENCIL_TEST:
stencil_test_state = GL_TRUE;
change_stencil_settings();
break;
case GL_BLEND:
if (!blend_state)
change_blend_factor();
blend_state = GL_TRUE;
break;
case GL_SCISSOR_TEST:
scissor_test_state = GL_TRUE;
update_scissor_test();
break;
case GL_CULL_FACE:
cull_face_state = GL_TRUE;
change_cull_mode();
break;
case GL_POLYGON_OFFSET_FILL:
pol_offset_fill = GL_TRUE;
update_polygon_offset();
break;
case GL_POLYGON_OFFSET_LINE:
pol_offset_line = GL_TRUE;
update_polygon_offset();
break;
case GL_POLYGON_OFFSET_POINT:
pol_offset_point = GL_TRUE;
update_polygon_offset();
break;
case GL_TEXTURE_2D:
texture_units[server_texture_unit].enabled = GL_TRUE;
break;
case GL_ALPHA_TEST:
alpha_test_state = GL_TRUE;
update_alpha_test_settings();
break;
case GL_FOG:
fogging = GL_TRUE;
update_fogging_state();
break;
case GL_CLIP_PLANE0:
clip_plane0 = GL_TRUE;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glDisable(GLenum cap) {
#ifndef SKIP_ERROR_HANDLING
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
switch (cap) {
case GL_DEPTH_TEST:
depth_test_state = GL_FALSE;
change_depth_func();
break;
case GL_STENCIL_TEST:
stencil_test_state = GL_FALSE;
change_stencil_settings();
break;
case GL_BLEND:
if (blend_state)
disable_blend();
blend_state = GL_FALSE;
break;
case GL_SCISSOR_TEST:
scissor_test_state = GL_FALSE;
update_scissor_test();
break;
case GL_CULL_FACE:
cull_face_state = GL_FALSE;
change_cull_mode();
break;
case GL_POLYGON_OFFSET_FILL:
pol_offset_fill = GL_FALSE;
update_polygon_offset();
break;
case GL_POLYGON_OFFSET_LINE:
pol_offset_line = GL_FALSE;
update_polygon_offset();
break;
case GL_POLYGON_OFFSET_POINT:
pol_offset_point = GL_FALSE;
update_polygon_offset();
break;
case GL_TEXTURE_2D:
texture_units[server_texture_unit].enabled = GL_FALSE;
break;
case GL_ALPHA_TEST:
alpha_test_state = GL_FALSE;
update_alpha_test_settings();
break;
case GL_FOG:
fogging = GL_FALSE;
update_fogging_state();
break;
case GL_CLIP_PLANE0:
clip_plane0 = GL_FALSE;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glClear(GLbitfield mask) {
GLenum orig_depth_test = depth_test_state;
if ((mask & GL_COLOR_BUFFER_BIT) == GL_COLOR_BUFFER_BIT) {
invalidate_depth_test();
change_depth_write(SCE_GXM_DEPTH_WRITE_DISABLED);
sceGxmSetFrontPolygonMode(gxm_context, SCE_GXM_POLYGON_MODE_TRIANGLE_FILL);
sceGxmSetBackPolygonMode(gxm_context, SCE_GXM_POLYGON_MODE_TRIANGLE_FILL);
sceGxmSetVertexProgram(gxm_context, clear_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, clear_fragment_program_patched);
void *color_buffer;
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &color_buffer);
sceGxmSetUniformDataF(color_buffer, clear_color, 0, 4, &clear_rgba_val.r);
sceGxmSetVertexStream(gxm_context, 0, clear_vertices);
sceGxmDraw(gxm_context, SCE_GXM_PRIMITIVE_TRIANGLE_FAN, SCE_GXM_INDEX_FORMAT_U16, depth_clear_indices, 4);
validate_depth_test();
change_depth_write((depth_mask_state && orig_depth_test) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
sceGxmSetFrontPolygonMode(gxm_context, polygon_mode_front);
sceGxmSetBackPolygonMode(gxm_context, polygon_mode_back);
}
if ((mask & GL_DEPTH_BUFFER_BIT) == GL_DEPTH_BUFFER_BIT) {
invalidate_depth_test();
change_depth_write(SCE_GXM_DEPTH_WRITE_ENABLED);
sceGxmSetVertexProgram(gxm_context, clear_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, disable_color_buffer_fragment_program_patched);
void *depth_buffer;
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &depth_buffer);
float temp = depth_value;
sceGxmSetUniformDataF(depth_buffer, clear_depth, 0, 1, &temp);
sceGxmSetVertexStream(gxm_context, 0, clear_vertices);
sceGxmDraw(gxm_context, SCE_GXM_PRIMITIVE_TRIANGLE_FAN, SCE_GXM_INDEX_FORMAT_U16, depth_clear_indices, 4);
validate_depth_test();
change_depth_write((depth_mask_state && orig_depth_test) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
}
if ((mask & GL_STENCIL_BUFFER_BIT) == GL_STENCIL_BUFFER_BIT) {
invalidate_depth_test();
change_depth_write(SCE_GXM_DEPTH_WRITE_DISABLED);
sceGxmSetVertexProgram(gxm_context, clear_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, disable_color_buffer_fragment_program_patched);
sceGxmSetFrontStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_NEVER,
SCE_GXM_STENCIL_OP_REPLACE,
SCE_GXM_STENCIL_OP_REPLACE,
SCE_GXM_STENCIL_OP_REPLACE,
0, stencil_value * 0xFF);
sceGxmSetBackStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_NEVER,
SCE_GXM_STENCIL_OP_REPLACE,
SCE_GXM_STENCIL_OP_REPLACE,
SCE_GXM_STENCIL_OP_REPLACE,
0, stencil_value * 0xFF);
void *depth_buffer;
sceGxmReserveFragmentDefaultUniformBuffer(gxm_context, &depth_buffer);
float temp = 1.0f;
sceGxmSetUniformDataF(depth_buffer, clear_depth, 0, 1, &temp);
sceGxmSetVertexStream(gxm_context, 0, clear_vertices);
sceGxmDraw(gxm_context, SCE_GXM_PRIMITIVE_TRIANGLE_FAN, SCE_GXM_INDEX_FORMAT_U16, depth_clear_indices, 4);
validate_depth_test();
change_depth_write((depth_mask_state && orig_depth_test) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
change_stencil_settings();
}
}
void glClearColor(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) {
clear_rgba_val.r = red;
clear_rgba_val.g = green;
clear_rgba_val.b = blue;
clear_rgba_val.a = alpha;
}
void glReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *data) {
SceDisplayFrameBuf pParam;
pParam.size = sizeof(SceDisplayFrameBuf);
sceDisplayGetFrameBuf(&pParam, SCE_DISPLAY_SETBUF_NEXTFRAME);
y = DISPLAY_HEIGHT - (height + y);
int i, j;
uint8_t *out8 = (uint8_t *)data;
uint8_t *in8 = (uint8_t *)pParam.base;
uint32_t *out32 = (uint32_t *)data;
uint32_t *in32 = (uint32_t *)pParam.base;
switch (format) {
case GL_RGBA:
switch (type) {
case GL_UNSIGNED_BYTE:
in32 += (x + y * pParam.pitch);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
out32[(height - (i + 1)) * width + j] = in32[j];
}
in32 += pParam.pitch;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RGB:
switch (type) {
case GL_UNSIGNED_BYTE:
in8 += (x * 4 + y * pParam.pitch * 4);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
out8[((height - (i + 1)) * width + j) * 3] = in8[j * 4];
out8[((height - (i + 1)) * width + j) * 3 + 1] = in8[j * 4 + 1];
out8[((height - (i + 1)) * width + j) * 3 + 2] = in8[j * 4 + 2];
}
in8 += pParam.pitch * 4;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glLineWidth(GLfloat width) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (width <= 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Changing line and point width as requested
sceGxmSetFrontPointLineWidth(gxm_context, width);
sceGxmSetBackPointLineWidth(gxm_context, width);
}
void glPointSize(GLfloat size) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (size <= 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Changing line and point width as requested
sceGxmSetFrontPointLineWidth(gxm_context, size);
sceGxmSetBackPointLineWidth(gxm_context, size);
}
void glFogf(GLenum pname, GLfloat param) {
switch (pname) {
case GL_FOG_MODE:
fog_mode = param;
update_fogging_state();
break;
case GL_FOG_DENSITY:
fog_density = param;
break;
case GL_FOG_START:
fog_near = param;
break;
case GL_FOG_END:
fog_far = param;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glFogfv(GLenum pname, const GLfloat *params) {
switch (pname) {
case GL_FOG_MODE:
fog_mode = params[0];
update_fogging_state();
break;
case GL_FOG_DENSITY:
fog_density = params[0];
break;
case GL_FOG_START:
fog_near = params[0];
break;
case GL_FOG_END:
fog_far = params[0];
break;
case GL_FOG_COLOR:
memcpy(&fog_color.r, params, sizeof(vector4f));
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glFogi(GLenum pname, const GLint param) {
switch (pname) {
case GL_FOG_MODE:
fog_mode = param;
update_fogging_state();
break;
case GL_FOG_DENSITY:
fog_density = param;
break;
case GL_FOG_START:
fog_near = param;
break;
case GL_FOG_END:
fog_far = param;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glClipPlane(GLenum plane, const GLdouble *equation) {
switch (plane) {
case GL_CLIP_PLANE0:
clip_plane0_eq.x = equation[0];
clip_plane0_eq.y = equation[1];
clip_plane0_eq.z = equation[2];
clip_plane0_eq.w = equation[3];
matrix4x4 inverted, inverted_transposed;
matrix4x4_invert(inverted, modelview_matrix);
matrix4x4_transpose(inverted_transposed, inverted);
vector4f temp;
vector4f_matrix4x4_mult(&temp, inverted_transposed, &clip_plane0_eq);
memcpy(&clip_plane0_eq.x, &temp.x, sizeof(vector4f));
break;
default:
error = GL_INVALID_ENUM;
break;
}
}

89
deps/vitaGL/source/shaders.h vendored Normal file
View File

@ -0,0 +1,89 @@
/*
*shaders.h:
*Header file for default shaders related stuffs
*/
#ifndef _SHADERS_H_
#define _SHADERS_H_
// Disable color buffer shader
SceGxmShaderPatcherId disable_color_buffer_fragment_id;
const SceGxmProgramParameter *disable_color_buffer_position;
SceGxmFragmentProgram *disable_color_buffer_fragment_program_patched;
const SceGxmProgramParameter *clear_depth;
// Clear shader
SceGxmShaderPatcherId clear_vertex_id;
SceGxmShaderPatcherId clear_fragment_id;
const SceGxmProgramParameter *clear_position;
const SceGxmProgramParameter *clear_color;
SceGxmVertexProgram *clear_vertex_program_patched;
SceGxmFragmentProgram *clear_fragment_program_patched;
// Color (RGBA/RGB) shader
SceGxmShaderPatcherId rgba_vertex_id;
SceGxmShaderPatcherId rgb_vertex_id;
SceGxmShaderPatcherId rgba_fragment_id;
const SceGxmProgramParameter *rgba_position;
const SceGxmProgramParameter *rgba_color;
const SceGxmProgramParameter *rgba_wvp;
const SceGxmProgramParameter *rgb_position;
const SceGxmProgramParameter *rgb_color;
const SceGxmProgramParameter *rgb_wvp;
SceGxmVertexProgram *rgba_vertex_program_patched;
SceGxmVertexProgram *rgba_u8n_vertex_program_patched;
SceGxmVertexProgram *rgb_vertex_program_patched;
SceGxmVertexProgram *rgb_u8n_vertex_program_patched;
SceGxmFragmentProgram *rgba_fragment_program_patched;
const SceGxmProgram *rgba_fragment_program;
// Texture2D shader
SceGxmShaderPatcherId texture2d_vertex_id;
SceGxmShaderPatcherId texture2d_fragment_id;
const SceGxmProgramParameter *texture2d_position;
const SceGxmProgramParameter *texture2d_texcoord;
const SceGxmProgramParameter *texture2d_wvp;
const SceGxmProgramParameter *texture2d_alpha_cut;
const SceGxmProgramParameter *texture2d_alpha_op;
const SceGxmProgramParameter *texture2d_tint_color;
const SceGxmProgramParameter *texture2d_tex_env;
const SceGxmProgramParameter *texture2d_clip_plane0;
const SceGxmProgramParameter *texture2d_clip_plane0_eq;
const SceGxmProgramParameter *texture2d_mv;
const SceGxmProgramParameter *texture2d_fog_mode;
const SceGxmProgramParameter *texture2d_fog_mode2;
const SceGxmProgramParameter *texture2d_fog_near;
const SceGxmProgramParameter *texture2d_fog_far;
const SceGxmProgramParameter *texture2d_fog_density;
const SceGxmProgramParameter *texture2d_fog_color;
const SceGxmProgramParameter *texture2d_tex_env_color;
SceGxmVertexProgram *texture2d_vertex_program_patched;
SceGxmFragmentProgram *texture2d_fragment_program_patched;
const SceGxmProgram *texture2d_fragment_program;
// Texture2D+RGBA shader
SceGxmShaderPatcherId texture2d_rgba_vertex_id;
SceGxmShaderPatcherId texture2d_rgba_fragment_id;
const SceGxmProgramParameter *texture2d_rgba_position;
const SceGxmProgramParameter *texture2d_rgba_texcoord;
const SceGxmProgramParameter *texture2d_rgba_wvp;
const SceGxmProgramParameter *texture2d_rgba_alpha_cut;
const SceGxmProgramParameter *texture2d_rgba_alpha_op;
const SceGxmProgramParameter *texture2d_rgba_color;
const SceGxmProgramParameter *texture2d_rgba_tex_env;
const SceGxmProgramParameter *texture2d_rgba_clip_plane0;
const SceGxmProgramParameter *texture2d_rgba_clip_plane0_eq;
const SceGxmProgramParameter *texture2d_rgba_mv;
const SceGxmProgramParameter *texture2d_rgba_fog_mode;
const SceGxmProgramParameter *texture2d_rgba_fog_mode2;
const SceGxmProgramParameter *texture2d_rgba_fog_near;
const SceGxmProgramParameter *texture2d_rgba_fog_far;
const SceGxmProgramParameter *texture2d_rgba_fog_density;
const SceGxmProgramParameter *texture2d_rgba_fog_color;
const SceGxmProgramParameter *texture2d_rgba_tex_env_color;
SceGxmVertexProgram *texture2d_rgba_vertex_program_patched;
SceGxmVertexProgram *texture2d_rgba_u8n_vertex_program_patched;
SceGxmFragmentProgram *texture2d_rgba_fragment_program_patched;
const SceGxmProgram *texture2d_rgba_fragment_program;
#endif

23
deps/vitaGL/source/shaders/clear_f.h vendored Normal file
View File

@ -0,0 +1,23 @@
#ifndef __clear_f__
#define __clear_f__
static unsigned int size_clear_f = 240;
static unsigned char clear_f[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0xee, 0x00, 0x00, 0x00, 0xd2, 0x36, 0xf6, 0xf2,
0x44, 0xaa, 0x22, 0x14, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00,
0x78, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x80, 0x19, 0xa0,
0x7e, 0x0d, 0x84, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x44, 0xfa,
0x00, 0x00, 0x00, 0xc5, 0x22, 0x04, 0x80, 0x38, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
0x10, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x75, 0x5f, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x5f, 0x63, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x00, 0x00,
};
#endif

25
deps/vitaGL/source/shaders/clear_v.h vendored Normal file
View File

@ -0,0 +1,25 @@
#ifndef __clear_v__
#define __clear_v__
static unsigned int size_clear_v = 264;
static unsigned char clear_v[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x05, 0x01, 0x00, 0x00, 0x5b, 0x80, 0x2c, 0x29,
0x17, 0xc6, 0xc1, 0x92, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x00, 0x00,
0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x01, 0x00, 0x24, 0x9f,
0x88, 0x1f, 0xc5, 0x08, 0x00, 0x0f, 0x00, 0x03, 0x21, 0x05, 0x80, 0x38, 0x00, 0x0f, 0x04, 0x03,
0x59, 0x05, 0x80, 0x38, 0x00, 0x00, 0x20, 0xa0, 0x00, 0x50, 0x27, 0xfb, 0x10, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x6f, 0x73, 0x69,
0x74, 0x69, 0x6f, 0x6e, 0x00, 0x00, 0x00, 0x00,
};
#endif

View File

@ -0,0 +1,25 @@
#ifndef __disable_color_buffer_f__
#define __disable_color_buffer_f__
static unsigned int size_disable_color_buffer_f = 272;
static unsigned char disable_color_buffer_f[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0xf4, 0x51, 0x72, 0xac,
0x53, 0xb5, 0x21, 0x1b, 0x15, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00,
0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x07, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x44, 0xfa, 0x00, 0x00, 0x00, 0xe0,
0x08, 0x00, 0x81, 0x50, 0x02, 0x01, 0x00, 0xf0, 0x00, 0x00, 0x30, 0xfb, 0x00, 0x00, 0x00, 0x00,
0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x44, 0xfa, 0x00, 0x00, 0x00, 0x00,
0x00, 0xe0, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x13, 0x00, 0x00, 0x00,
0x02, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5f, 0x63, 0x6c, 0x65, 0x61, 0x72, 0x00,
};
#endif

29
deps/vitaGL/source/shaders/rgb_v.h vendored Normal file
View File

@ -0,0 +1,29 @@
#ifndef __rgb_v__
#define __rgb_v__
static unsigned int size_rgb_v = 332;
static unsigned char rgb_v[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x49, 0x01, 0x00, 0x00, 0xbc, 0xb8, 0x59, 0x66,
0x33, 0x69, 0xbf, 0xbb, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x08, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x00, 0x00,
0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0xaf,
0x9c, 0x0d, 0x80, 0x40, 0x00, 0xa2, 0x3d, 0xc0, 0x81, 0x38, 0x90, 0x18, 0x81, 0x00, 0x04, 0x9f,
0x84, 0x1f, 0xc5, 0x08, 0x00, 0x0f, 0x08, 0x03, 0x21, 0x05, 0x80, 0x38, 0x00, 0x0f, 0x0c, 0x03,
0x59, 0x05, 0x80, 0x38, 0x00, 0x00, 0x20, 0xa0, 0x00, 0x50, 0x27, 0xfb, 0x0e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x10, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x61, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x00, 0x61, 0x43,
0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x77, 0x76, 0x70, 0x00, 0x00, 0x00, 0x00,
};
#endif

21
deps/vitaGL/source/shaders/rgba_f.h vendored Normal file
View File

@ -0,0 +1,21 @@
#ifndef __rgba_f__
#define __rgba_f__
static unsigned int size_rgba_f = 200;
static unsigned char rgba_f[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0xb4, 0x45, 0x0f, 0x73,
0x8c, 0x01, 0x37, 0xf7, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x04, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0f, 0xa0, 0xc0, 0x2e,
0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x44, 0xfa,
};
#endif

28
deps/vitaGL/source/shaders/rgba_v.h vendored Normal file
View File

@ -0,0 +1,28 @@
#ifndef __rgba_v__
#define __rgba_v__
static unsigned int size_rgba_v = 308;
static unsigned char rgba_v[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x31, 0x01, 0x00, 0x00, 0x05, 0x21, 0xf7, 0xc8,
0xe0, 0xa4, 0xdd, 0x80, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x08, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x00, 0x00, 0x00,
0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x80, 0x00, 0x08, 0x83, 0x21, 0x1d, 0x80, 0x38, 0x02, 0x80, 0x81, 0xaf,
0x9c, 0x0d, 0x80, 0x40, 0x00, 0xa2, 0x3d, 0xc0, 0x81, 0x30, 0x90, 0x18, 0x00, 0x00, 0x20, 0xa0,
0x00, 0x50, 0x27, 0xfb, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x30, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x50, 0x6f, 0x73,
0x69, 0x74, 0x69, 0x6f, 0x6e, 0x00, 0x61, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x77, 0x76, 0x70,
0x00, 0x00, 0x00, 0x00,
};
#endif

View File

@ -0,0 +1,73 @@
#ifndef __texture2d_f__
#define __texture2d_f__
static unsigned int size_texture2d_f = 1032;
static unsigned char texture2d_f[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0xae, 0x88, 0x5a, 0x84,
0x11, 0x87, 0x08, 0x77, 0x09, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x03, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x05, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x41, 0x00, 0x00, 0x00,
0xac, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0xcc, 0x02, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xb0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x04, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x0f, 0xc0, 0x00, 0x0e,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x10, 0x44, 0xa3,
0xa6, 0x41, 0xa4, 0x08, 0x88, 0x10, 0xc4, 0xa1, 0xaa, 0x00, 0xc0, 0x08, 0x00, 0x00, 0x00, 0x00,
0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x44, 0xfa, 0x93, 0x83, 0x07, 0xf0, 0x81, 0x06, 0xa8, 0x48, 0x03, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0xa0, 0x08, 0x30, 0x81, 0x50, 0x1b, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf8, 0x94, 0x83, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x01, 0x00, 0x04, 0xb0,
0x84, 0x41, 0xa4, 0x09, 0x42, 0x00, 0x44, 0xb0, 0x88, 0x41, 0xc0, 0x09, 0x17, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x95, 0x83, 0x07, 0xf0, 0x81, 0x01, 0xa8, 0x48, 0x0f, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x96, 0x83, 0x07, 0xf0, 0x81, 0x01, 0xa8, 0x48, 0x09, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x04, 0x81, 0x99, 0xff,
0xbc, 0x0d, 0xc0, 0x40, 0x00, 0x01, 0x5d, 0x8f, 0x84, 0x8b, 0xa1, 0x18, 0x41, 0x5f, 0x44, 0x1f,
0x84, 0x1b, 0xa5, 0x08, 0x40, 0x6f, 0x04, 0x10, 0x84, 0x09, 0xa5, 0x08, 0x40, 0x6f, 0x4d, 0x10,
0x80, 0x00, 0x81, 0x08, 0x41, 0x80, 0x4d, 0x80, 0x00, 0x81, 0xe1, 0x18, 0x0b, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf8, 0x41, 0x03, 0x00, 0xf0, 0x84, 0x91, 0xa0, 0x00, 0xc2, 0x11, 0x40, 0xf0,
0x84, 0x00, 0x80, 0x00, 0x42, 0x80, 0x64, 0xb0, 0x00, 0x11, 0x84, 0x08, 0x07, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x04, 0x81, 0x81, 0xff,
0x9c, 0x0d, 0xc0, 0x40, 0x41, 0x00, 0x11, 0x8f, 0xc0, 0x8b, 0xb1, 0x18, 0x7c, 0x00, 0x10, 0x80,
0x84, 0x81, 0xa0, 0x00, 0x7c, 0x10, 0x50, 0x80, 0x84, 0x40, 0x80, 0x00, 0x80, 0x00, 0x04, 0xc2,
0x08, 0x05, 0x80, 0x38, 0x18, 0x83, 0x07, 0xf0, 0x81, 0x06, 0x88, 0x48, 0x00, 0x00, 0x0c, 0xa0,
0x85, 0x01, 0x8b, 0x4d, 0x15, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xfd, 0x14, 0x83, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0x30, 0x15, 0xc9, 0x88, 0x49, 0x12, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x15, 0x83, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0x30,
0x95, 0xca, 0x88, 0x49, 0x0f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x16, 0x83, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0x30, 0x95, 0xc2, 0x88, 0x49, 0x0c, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x17, 0x83, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0x30,
0x95, 0xc1, 0x88, 0x49, 0x09, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x13, 0x83, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0x30, 0x15, 0xc5, 0x88, 0x49, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x19, 0x83, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x00, 0x00, 0x0c, 0xa0,
0x85, 0x01, 0x8b, 0x48, 0x00, 0x81, 0x03, 0x50, 0x05, 0x89, 0x8f, 0x4d, 0x40, 0x00, 0x00, 0x01,
0x0a, 0x05, 0x80, 0x38, 0x00, 0x80, 0x03, 0xb0, 0x85, 0x86, 0x88, 0x4a, 0x12, 0x09, 0x00, 0xf0,
0x06, 0x04, 0x30, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x17, 0x84, 0x07, 0xf0, 0x81, 0x06, 0xa8, 0x48, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x0c, 0x85, 0x81, 0xff,
0x9c, 0x0d, 0xc0, 0x40, 0x02, 0x00, 0x11, 0x8f, 0xc0, 0x8b, 0xb1, 0x18, 0x3c, 0x20, 0x00, 0x00,
0x84, 0x91, 0x80, 0x00, 0x7c, 0x20, 0x40, 0x00, 0x84, 0x40, 0x80, 0x00, 0x02, 0x80, 0x19, 0x00,
0x7e, 0x0d, 0x80, 0x40, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xe0, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x13, 0x00, 0x00, 0x00,
0x12, 0x00, 0x08, 0x00, 0x80, 0x00, 0x00, 0x00, 0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0a, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x02, 0x04, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x43, 0x75, 0x74, 0x00, 0x61, 0x6c, 0x70,
0x68, 0x61, 0x4f, 0x70, 0x00, 0x74, 0x69, 0x6e, 0x74, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x74,
0x65, 0x78, 0x45, 0x6e, 0x76, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x00, 0x66,
0x6f, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x74, 0x65, 0x78, 0x45, 0x6e, 0x76, 0x43, 0x6f,
0x6c, 0x6f, 0x72, 0x00, 0x74, 0x65, 0x78, 0x00,
};
#endif

View File

@ -0,0 +1,73 @@
#ifndef __texture2d_rgba_f__
#define __texture2d_rgba_f__
static unsigned int size_texture2d_rgba_f = 1040;
static unsigned char texture2d_rgba_f[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x0e, 0x04, 0x00, 0x00, 0x3d, 0x43, 0x2b, 0xd5,
0x8b, 0x8f, 0xb3, 0x36, 0x09, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x09, 0x00, 0x14, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x48, 0x00, 0x00, 0x00,
0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xec, 0x02, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0x02, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xd0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x02, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0xc0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x04, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0xa9, 0xd0, 0x0c,
0x00, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x0f, 0xc0, 0x00, 0x0e,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x44, 0xfa, 0x0d, 0x81, 0x07, 0xf0,
0x81, 0x06, 0xa8, 0x48, 0x03, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0xa0,
0x0a, 0x30, 0x81, 0x50, 0x22, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x0e, 0x81, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x00, 0x44, 0xa0, 0x84, 0x41, 0xa4, 0x09, 0xc1, 0x00, 0x04, 0xa0,
0x88, 0x41, 0xc0, 0x09, 0x1b, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x0f, 0x81, 0x07, 0xf0,
0x81, 0x01, 0xa8, 0x48, 0x13, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x10, 0x81, 0x07, 0xf0,
0x81, 0x01, 0xa8, 0x48, 0x09, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00,
0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x99, 0xaf, 0xbc, 0x0d, 0xc0, 0x40, 0x02, 0x01, 0x5d, 0x8f,
0x84, 0x8b, 0xa1, 0x18, 0x41, 0x5f, 0x44, 0x1f, 0x84, 0x1b, 0xa5, 0x08, 0x40, 0x6f, 0x04, 0x10,
0x86, 0x09, 0xa5, 0x08, 0x40, 0x6f, 0x4d, 0x10, 0x82, 0x00, 0x81, 0x08, 0x43, 0x80, 0x4d, 0x80,
0x02, 0x81, 0xe1, 0x18, 0x12, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00,
0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x99, 0xaf, 0xbc, 0x0d, 0xc0, 0x40, 0x06, 0x82, 0xa1, 0xaf,
0x9c, 0x0d, 0xc0, 0x40, 0x04, 0x01, 0x91, 0xdf, 0x80, 0x8b, 0xb1, 0x18, 0x3d, 0x21, 0x11, 0x00,
0xc2, 0x89, 0xb1, 0x18, 0xbd, 0xa0, 0x48, 0x00, 0xc2, 0x80, 0xb1, 0x18, 0x43, 0x80, 0x4d, 0x80,
0x02, 0x81, 0xe1, 0x18, 0x0a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x00, 0x00, 0x00, 0x00,
0x40, 0x09, 0x00, 0xf8, 0x06, 0x82, 0x81, 0xaf, 0x9c, 0x0d, 0xc0, 0x40, 0x00, 0x01, 0x1d, 0x8f,
0xc4, 0x8b, 0xa1, 0x18, 0xc0, 0xc0, 0x13, 0xa0, 0x82, 0x81, 0xa0, 0x00, 0xc1, 0xc0, 0x5b, 0xa0,
0x82, 0x00, 0x80, 0x00, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x01, 0x4e, 0x01, 0x01,
0x02, 0x00, 0x10, 0xfa, 0x00, 0x00, 0x00, 0x20, 0x0a, 0x30, 0x81, 0x50, 0x01, 0x01, 0x01, 0x01,
0x00, 0x00, 0x10, 0xfa, 0x92, 0x80, 0x07, 0xf0, 0x81, 0x06, 0x88, 0x48, 0x00, 0x00, 0x0c, 0xa0,
0x85, 0x01, 0x8b, 0x4d, 0x15, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xfd, 0x8e, 0x80, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0xb0, 0x15, 0xc9, 0x88, 0x49, 0x12, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x8f, 0x80, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0xb0,
0x95, 0xca, 0x88, 0x49, 0x0f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x90, 0x80, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0xb0, 0x95, 0xc2, 0x88, 0x49, 0x0c, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x91, 0x80, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0xb0,
0x95, 0xc1, 0x88, 0x49, 0x09, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x8d, 0x80, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x80, 0x80, 0x03, 0xb0, 0x15, 0xc5, 0x88, 0x49, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x93, 0x80, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x00, 0x00, 0x0c, 0xa0,
0x85, 0x01, 0x8b, 0x48, 0x00, 0x81, 0x03, 0x50, 0x05, 0x89, 0x8f, 0x4d, 0x40, 0x00, 0x08, 0x81,
0x0a, 0x05, 0x80, 0x38, 0x00, 0x81, 0x03, 0xb0, 0x85, 0x86, 0x88, 0x4a, 0x0c, 0x06, 0x00, 0xf0,
0x06, 0x04, 0x30, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x91, 0x81, 0x07, 0xf0, 0x81, 0x06, 0xa8, 0x48, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x06, 0x82, 0x81, 0xff,
0x9c, 0x0d, 0xc0, 0x40, 0x04, 0x00, 0x11, 0x8f, 0xc0, 0x8b, 0xb1, 0x18, 0x3c, 0x40, 0x00, 0x80,
0x86, 0x91, 0x80, 0x00, 0x7c, 0x40, 0x40, 0x80, 0x86, 0x40, 0x80, 0x00, 0x02, 0x80, 0x19, 0xa0,
0x7e, 0x0d, 0x80, 0x40, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xe0, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x13, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x08, 0x00, 0x70, 0x00, 0x00, 0x00, 0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x02, 0x04, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x43, 0x75, 0x74, 0x00, 0x61, 0x6c, 0x70,
0x68, 0x61, 0x4f, 0x70, 0x00, 0x74, 0x65, 0x78, 0x45, 0x6e, 0x76, 0x00, 0x66, 0x6f, 0x67, 0x5f,
0x6d, 0x6f, 0x64, 0x65, 0x00, 0x66, 0x6f, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x74, 0x65,
0x78, 0x45, 0x6e, 0x76, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x00, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00,
};
#endif

View File

@ -0,0 +1,65 @@
#ifndef __texture2d_rgba_v__
#define __texture2d_rgba_v__
static unsigned int size_texture2d_rgba_v = 900;
static unsigned char texture2d_rgba_v[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x83, 0x03, 0x00, 0x00, 0x8f, 0x3b, 0x70, 0x51,
0x7e, 0x76, 0xab, 0xcf, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x44, 0x02, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x27, 0x00, 0x00, 0x00,
0xac, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xd8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x01, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0xc8, 0x01, 0x00, 0x00, 0x37, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x1a, 0x00, 0x0d, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x68, 0x41, 0x0a, 0x05, 0x82, 0x38,
0x14, 0x15, 0x04, 0xa5, 0xa6, 0x10, 0xa4, 0x08, 0x02, 0x0a, 0x20, 0x82, 0x02, 0x00, 0x80, 0x30,
0x11, 0x85, 0x68, 0xa6, 0x86, 0x10, 0xc0, 0x08, 0x10, 0x06, 0x04, 0xa5, 0xa6, 0x00, 0xa4, 0x08,
0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x01, 0x0e, 0x01, 0x34, 0x0a, 0x00, 0x10, 0xfa, 0x80, 0x00, 0x08, 0x83,
0x21, 0x25, 0x80, 0x38, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x10, 0xfa, 0x2b, 0x91, 0x07, 0xf0,
0x81, 0x06, 0xc8, 0x48, 0x40, 0x05, 0xf0, 0xc1, 0x00, 0x05, 0x80, 0x38, 0x41, 0x92, 0x80, 0x41,
0x81, 0x80, 0xd3, 0x19, 0x06, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00,
0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0xaf, 0x9c, 0x0d, 0x80, 0x40, 0x08, 0xa2, 0xbd, 0xc0,
0x82, 0x38, 0x90, 0x18, 0x26, 0x92, 0x99, 0xff, 0xbc, 0x0d, 0x80, 0x40, 0x02, 0xa2, 0x91, 0x81,
0x81, 0x80, 0xd1, 0x18, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0xaf,
0x9c, 0x0d, 0x80, 0x40, 0x00, 0xa2, 0x3d, 0xc0, 0x81, 0x30, 0x90, 0x18, 0xac, 0x90, 0x07, 0xf0,
0x81, 0x0a, 0x88, 0x48, 0x80, 0x06, 0x10, 0xc1, 0x01, 0x05, 0x80, 0x39, 0x14, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0x5f,
0x9c, 0x0d, 0x80, 0x40, 0x3c, 0xa2, 0x11, 0x00, 0x82, 0x80, 0x81, 0x18, 0x01, 0x00, 0x00, 0x80,
0x02, 0x02, 0x80, 0x30, 0x02, 0x00, 0x00, 0x80, 0x02, 0x00, 0x80, 0x30, 0xad, 0x90, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x59, 0x04, 0x14, 0xf0, 0xa6, 0x00, 0x80, 0x01, 0x0a, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0xab, 0x90, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x04, 0x04, 0xe0, 0x86, 0x10, 0xa4, 0x08, 0x00, 0x00, 0x04, 0xa0,
0x86, 0x00, 0xa4, 0x08, 0x17, 0x00, 0x04, 0xb0, 0x86, 0x10, 0xa4, 0x08, 0x01, 0x00, 0x00, 0x80,
0x02, 0x06, 0x80, 0x30, 0x03, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x00, 0x05, 0x04, 0xe0,
0x86, 0x10, 0xa4, 0x08, 0x01, 0x00, 0x00, 0x80, 0x02, 0x06, 0x80, 0x30, 0x17, 0x50, 0x04, 0xb0,
0x86, 0x00, 0xa4, 0x08, 0x00, 0x60, 0x04, 0x91, 0x85, 0x00, 0xa5, 0x08, 0x00, 0x00, 0x20, 0xa0,
0x00, 0x50, 0x27, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x05, 0x00, 0x00, 0x00,
0x26, 0x35, 0x05, 0xc0, 0x06, 0x00, 0x00, 0x00, 0x3b, 0xaa, 0xb8, 0x3f, 0x0e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x2a, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x07, 0x00, 0xb0, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa9, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa2, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00,
0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00,
0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x7a, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x70, 0x6f, 0x73, 0x69,
0x74, 0x69, 0x6f, 0x6e, 0x00, 0x74, 0x65, 0x78, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x00, 0x63, 0x6f,
0x6c, 0x6f, 0x72, 0x00, 0x77, 0x76, 0x70, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65,
0x00, 0x63, 0x6c, 0x69, 0x70, 0x5f, 0x70, 0x6c, 0x61, 0x6e, 0x65, 0x30, 0x00, 0x63, 0x6c, 0x69,
0x70, 0x5f, 0x70, 0x6c, 0x61, 0x6e, 0x65, 0x30, 0x5f, 0x65, 0x71, 0x00, 0x6d, 0x6f, 0x64, 0x65,
0x6c, 0x76, 0x69, 0x65, 0x77, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x6e, 0x65, 0x61, 0x72, 0x00, 0x66,
0x6f, 0x67, 0x5f, 0x66, 0x61, 0x72, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x64, 0x65, 0x6e, 0x73, 0x69,
0x74, 0x79, 0x00, 0x00,
};
#endif

View File

@ -0,0 +1,62 @@
#ifndef __texture2d_v__
#define __texture2d_v__
static unsigned int size_texture2d_v = 864;
static unsigned char texture2d_v[] __attribute__((aligned(16))) = {
0x47, 0x58, 0x50, 0x00, 0x01, 0x04, 0x00, 0x00, 0x5d, 0x03, 0x00, 0x00, 0x7d, 0xa1, 0xee, 0xec,
0x39, 0xcd, 0x14, 0x0a, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x08, 0x00, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x25, 0x00, 0x00, 0x00,
0xac, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0xe4, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x01, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x00, 0x09, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x68, 0x41, 0x0a, 0x05, 0x82, 0x38,
0x14, 0x15, 0x04, 0xa5, 0xa6, 0x10, 0xa4, 0x08, 0x02, 0x0a, 0x20, 0x82, 0x02, 0x00, 0x80, 0x30,
0x11, 0x85, 0x68, 0xa6, 0x86, 0x10, 0xc0, 0x08, 0x10, 0x06, 0x04, 0xa5, 0xa6, 0x00, 0xa4, 0x08,
0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x04, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x07, 0x44, 0xfa, 0x80, 0x00, 0x0c, 0x83, 0x21, 0x05, 0x80, 0x38, 0x2b, 0x91, 0x07, 0xf0,
0x81, 0x06, 0xc8, 0x48, 0x40, 0x05, 0xf0, 0xc1, 0x00, 0x05, 0x80, 0x38, 0x41, 0x92, 0x00, 0x41,
0x81, 0x80, 0xd3, 0x19, 0x06, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00,
0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0xaf, 0x9c, 0x0d, 0x80, 0x40, 0x08, 0xa2, 0x3d, 0xc0,
0x81, 0x38, 0x90, 0x18, 0x26, 0x92, 0x99, 0xff, 0xbc, 0x0d, 0x80, 0x40, 0x00, 0xa2, 0x11, 0x41,
0x81, 0x80, 0xd1, 0x18, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0xaf,
0x9c, 0x0d, 0x80, 0x40, 0x00, 0xa2, 0x3d, 0xc0, 0x81, 0x30, 0x90, 0x18, 0xac, 0x90, 0x07, 0xf0,
0x81, 0x0a, 0x88, 0x48, 0x80, 0x06, 0x08, 0xc1, 0x01, 0x05, 0x80, 0x39, 0x14, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x40, 0x09, 0x00, 0xf8, 0x02, 0x80, 0x81, 0x5f,
0x9c, 0x0d, 0x80, 0x40, 0x3c, 0xa2, 0x11, 0x00, 0x82, 0x80, 0x81, 0x18, 0x01, 0x00, 0x00, 0x80,
0x02, 0x02, 0x80, 0x30, 0x02, 0x00, 0x00, 0x80, 0x02, 0x00, 0x80, 0x30, 0xad, 0x90, 0x07, 0xf0,
0x81, 0x01, 0x88, 0x48, 0x59, 0x04, 0x14, 0xf0, 0xa6, 0x00, 0x80, 0x01, 0x0a, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0xab, 0x90, 0x07, 0xf0, 0x81, 0x01, 0x88, 0x48, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0xf9, 0x00, 0x04, 0x04, 0xe0, 0x86, 0x10, 0xa4, 0x08, 0x00, 0x00, 0x04, 0xa0,
0x86, 0x00, 0xa4, 0x08, 0x17, 0x00, 0x04, 0xb0, 0x86, 0x10, 0xa4, 0x08, 0x01, 0x00, 0x00, 0x80,
0x02, 0x06, 0x80, 0x30, 0x03, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xf8, 0x00, 0x05, 0x04, 0xe0,
0x86, 0x10, 0xa4, 0x08, 0x01, 0x00, 0x00, 0x80, 0x02, 0x06, 0x80, 0x30, 0x17, 0x50, 0x04, 0xb0,
0x86, 0x00, 0xa4, 0x08, 0x00, 0x60, 0x84, 0x90, 0x85, 0x00, 0xa5, 0x08, 0x00, 0x00, 0x20, 0xa0,
0x00, 0x50, 0x27, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x05, 0x00, 0x00, 0x00,
0x26, 0x35, 0x05, 0xc0, 0x06, 0x00, 0x00, 0x00, 0x3b, 0xaa, 0xb8, 0x3f, 0x0e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x2a, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x07, 0x00, 0xa0, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00,
0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00,
0x41, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x7a, 0x00, 0x00, 0x00,
0x01, 0xe4, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x6d, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00,
0x01, 0xe1, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x70, 0x6f, 0x73, 0x69,
0x74, 0x69, 0x6f, 0x6e, 0x00, 0x74, 0x65, 0x78, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x00, 0x77, 0x76,
0x70, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x00, 0x63, 0x6c, 0x69, 0x70, 0x5f,
0x70, 0x6c, 0x61, 0x6e, 0x65, 0x30, 0x00, 0x63, 0x6c, 0x69, 0x70, 0x5f, 0x70, 0x6c, 0x61, 0x6e,
0x65, 0x30, 0x5f, 0x65, 0x71, 0x00, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x76, 0x69, 0x65, 0x77, 0x00,
0x66, 0x6f, 0x67, 0x5f, 0x6e, 0x65, 0x61, 0x72, 0x00, 0x66, 0x6f, 0x67, 0x5f, 0x66, 0x61, 0x72,
0x00, 0x66, 0x6f, 0x67, 0x5f, 0x64, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x79, 0x00, 0x00, 0x00, 0x00,
};
#endif

165
deps/vitaGL/source/shared.h vendored Normal file
View File

@ -0,0 +1,165 @@
/*
* shared.h:
* All functions/definitions that shouldn't be exposed to
* end users but are used in multiple source files must be here
*/
#ifndef _SHARED_H_
#define _SHARED_H_
// Internal constants
#define TEXTURES_NUM 4096 // Available textures per texture unit
#define MODELVIEW_STACK_DEPTH 32 // Depth of modelview matrix stack
#define GENERIC_STACK_DEPTH 2 // Depth of generic matrix stack
#define DISPLAY_WIDTH_DEF 960 // Default display width in pixels
#define DISPLAY_HEIGHT_DEF 544 // Default display height in pixels
#define DISPLAY_BUFFER_COUNT 2 // Display buffers to use
#define GXM_TEX_MAX_SIZE 4096 // Maximum width/height in pixels per texture
#define BUFFERS_ADDR 0xA000 // Starting address for buffers indexing
#define BUFFERS_NUM 128 // Maximum number of allocatable buffers
// Internal constants set in bootup phase
extern int DISPLAY_WIDTH; // Display width in pixels
extern int DISPLAY_HEIGHT; // Display height in pixels
extern int DISPLAY_STRIDE; // Display stride in pixels
extern float DISPLAY_WIDTH_FLOAT; // Display width in pixels (float)
extern float DISPLAY_HEIGHT_FLOAT; // Display height in pixels (float)
#include <stdio.h>
#include <stdlib.h>
#include <vitasdk.h>
#include "vitaGL.h"
#include "utils/gpu_utils.h"
#include "utils/math_utils.h"
#include "utils/mem_utils.h"
#include "state.h"
#include "texture_callbacks.h"
// Texture environment mode
typedef enum texEnvMode {
MODULATE = 0,
DECAL = 1,
BLEND = 2,
ADD = 3,
REPLACE = 4
} texEnvMode;
// 3D vertex for position + 4D vertex for RGBA color struct
typedef struct rgba_vertex {
vector3f position;
vector4f color;
} rgba_vertex;
// 3D vertex for position + 3D vertex for RGB color struct
typedef struct rgb_vertex {
vector3f position;
vector3f color;
} rgb_vertex;
// 3D vertex for position + 2D vertex for UV map struct
typedef struct texture2d_vertex {
vector3f position;
vector2f texcoord;
} texture2d_vertex;
// Non native primitives implemented
typedef enum SceGxmPrimitiveTypeExtra {
SCE_GXM_PRIMITIVE_NONE = 0,
SCE_GXM_PRIMITIVE_QUADS = 1
} SceGxmPrimitiveTypeExtra;
#include "shaders.h"
// Internal stuffs
extern void *frag_uniforms;
extern void *vert_uniforms;
extern SceGxmMultisampleMode msaa_mode;
// Debugging tool
#ifdef ENABLE_LOG
void LOG(const char *format, ...);
#endif
// Depending on SDK, that could be or not defined
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
// sceGxm viewport setup (NOTE: origin is on center screen)
extern float x_port;
extern float y_port;
extern float z_port;
extern float x_scale;
extern float y_scale;
extern float z_scale;
extern SceGxmContext *gxm_context; // sceGxm context instance
extern GLenum error; // Error returned by glGetError
extern SceGxmShaderPatcher *gxm_shader_patcher; // sceGxmShaderPatcher shader patcher instance
matrix4x4 mvp_matrix; // ModelViewProjection Matrix
matrix4x4 projection_matrix; // Projection Matrix
matrix4x4 modelview_matrix; // ModelView Matrix
extern GLboolean mvp_modified; // Check if ModelViewProjection matrix needs to be recreated
extern GLuint cur_program; // Current in use custom program (0 = No custom program)
extern uint8_t viewport_mode; // Current setting for viewport mode
extern GLboolean vblank; // Current setting for VSync
extern GLenum orig_depth_test; // Original depth test state (used for depth test invalidation)
// Scissor test shaders
extern SceGxmFragmentProgram *scissor_test_fragment_program; // Scissor test fragment program
extern vector2f *scissor_test_vertices; // Scissor test region vertices
extern SceUID scissor_test_vertices_uid; // Scissor test vertices memblock id
extern uint16_t *depth_clear_indices; // Memblock starting address for clear screen indices
// Clear screen shaders
extern SceGxmVertexProgram *clear_vertex_program_patched; // Patched vertex program for clearing screen
extern vector2f *clear_vertices; // Memblock starting address for clear screen vertices
/* gxm.c */
void initGxm(void); // Inits sceGxm
void initGxmContext(void); // Inits sceGxm context
void termGxmContext(void); // Terms sceGxm context
void createDisplayRenderTarget(void); // Creates render target for the display
void destroyDisplayRenderTarget(void); // Destroys render target for the display
void initDisplayColorSurfaces(void); // Creates color surfaces for the display
void termDisplayColorSurfaces(void); // Destroys color surfaces for the display
void initDepthStencilBuffer(uint32_t w, uint32_t h, SceGxmDepthStencilSurface *surface, void **depth_buffer, void **stencil_buffer, vglMemType *depth_type, vglMemType *stencil_type); // Creates depth and stencil surfaces
void initDepthStencilSurfaces(void); // Creates depth and stencil surfaces for the display
void termDepthStencilSurfaces(void); // Destroys depth and stencil surfaces for the display
void startShaderPatcher(void); // Creates a shader patcher instance
void stopShaderPatcher(void); // Destroys a shader patcher instance
void waitRenderingDone(void); // Waits for rendering to be finished
/* tests.c */
void change_depth_write(SceGxmDepthWriteMode mode); // Changes current in use depth write mode
void change_depth_func(void); // Changes current in use depth test function
void invalidate_depth_test(void); // Invalidates depth test state
void validate_depth_test(void); // Resets original depth test state after invalidation
void change_stencil_settings(void); // Changes current in use stencil test parameters
GLboolean change_stencil_config(SceGxmStencilOp *cfg, GLenum new); // Changes current in use stencil test operation value
GLboolean change_stencil_func_config(SceGxmStencilFunc *cfg, GLenum new); // Changes current in use stencil test function value
void update_alpha_test_settings(void); // Changes current in use alpha test operation value
void update_scissor_test(void); // Changes current in use scissor test region
void resetScissorTestRegion(void); // Resets scissor test region to default values
/* blending.c */
void change_blend_factor(void); // Changes current blending settings for all used shaders
void disable_blend(void); // Disables blending for all used shaders
/* custom_shaders.c */
void resetCustomShaders(void); // Resets custom shaders
void changeCustomShadersBlend(SceGxmBlendInfo *blend_info); // Change SceGxmBlendInfo value to all custom shaders
void reloadCustomShader(void); // Reloads in use custom shader inside sceGxm
void _vglDrawObjects_CustomShadersIMPL(GLenum mode, GLsizei count, GLboolean implicit_wvp); // vglDrawObjects implementation for rendering with custom shaders
/* misc functions */
void vector2f_convert_to_local_space(vector2f *out, int x, int y, int width, int height); // Converts screen coords to local space
#endif

53
deps/vitaGL/source/state.c vendored Normal file
View File

@ -0,0 +1,53 @@
/*
* state.c:
* Initial config of the openGL machine state
*/
#include "shared.h"
// Blending
GLboolean blend_state = GL_FALSE; // Current state for GL_BLEND
SceGxmBlendFactor blend_sfactor_rgb = SCE_GXM_BLEND_FACTOR_ONE; // Current in use RGB source blend factor
SceGxmBlendFactor blend_dfactor_rgb = SCE_GXM_BLEND_FACTOR_ZERO; // Current in use RGB dest blend factor
SceGxmBlendFactor blend_sfactor_a = SCE_GXM_BLEND_FACTOR_ONE; // Current in use A source blend factor
SceGxmBlendFactor blend_dfactor_a = SCE_GXM_BLEND_FACTOR_ZERO; // Current in use A dest blend factor
// Polygon Mode
GLfloat pol_factor = 0.0f; // Current factor for glPolygonOffset
GLfloat pol_units = 0.0f; // Current units for glPolygonOffset
// Texture Units
int8_t client_texture_unit = 0; // Current in use client side texture unit
// Miscellaneous
glPhase phase = NONE; // Current drawing phase for legacy openGL
vector4f clear_rgba_val; // Current clear color for glClear
// Fogging
GLboolean fogging = GL_FALSE; // Current fogging processor state
GLint fog_mode = GL_EXP; // Current fogging mode (openGL)
fogType internal_fog_mode = DISABLED; // Current fogging mode (sceGxm)
GLfloat fog_density = 1.0f; // Current fogging density
GLfloat fog_near = 0.0f; // Current fogging near distance
GLfloat fog_far = 1.0f; // Current fogging far distance
vector4f fog_color = { 0.0f, 0.0f, 0.0f, 0.0f }; // Current fogging color
// Clipping Planes
GLint clip_plane0 = GL_FALSE; // Current status of clip plane 0
vector4f clip_plane0_eq = { 0.0f, 0.0f, 0.0f, 0.0f }; // Current equation of clip plane 0
// Cullling
GLboolean cull_face_state = GL_FALSE; // Current state for GL_CULL_FACE
GLenum gl_cull_mode = GL_BACK; // Current in use openGL cull mode
GLenum gl_front_face = GL_CCW; // Current in use openGL setting for front facing primitives
GLboolean no_polygons_mode = GL_FALSE; // GL_TRUE when cull mode is set to GL_FRONT_AND_BACK
// Polygon Offset
GLboolean pol_offset_fill = GL_FALSE; // Current state for GL_POLYGON_OFFSET_FILL
GLboolean pol_offset_line = GL_FALSE; // Current state for GL_POLYGON_OFFSET_LINE
GLboolean pol_offset_point = GL_FALSE; // Current state for GL_POLYGON_OFFSET_POINT
SceGxmPolygonMode polygon_mode_front = SCE_GXM_POLYGON_MODE_TRIANGLE_FILL; // Current in use polygon mode for front
SceGxmPolygonMode polygon_mode_back = SCE_GXM_POLYGON_MODE_TRIANGLE_FILL; // Current in use polygon mode for back
GLenum gl_polygon_mode_front = GL_FILL; // Current in use polygon mode for front
GLenum gl_polygon_mode_back = GL_FILL; // Current in use polygon mode for back
viewport gl_viewport; // Current viewport state

191
deps/vitaGL/source/state.h vendored Normal file
View File

@ -0,0 +1,191 @@
/*
* state.h:
* Header file managing state of openGL machine
*/
#ifndef _STATE_H_
#define _STATE_H_
// Drawing phases constants for legacy openGL
typedef enum glPhase {
NONE = 0,
MODEL_CREATION = 1
} glPhase;
// Vertex array attributes struct
typedef struct vertexArray {
GLint size;
GLint num;
GLsizei stride;
const GLvoid *pointer;
} vertexArray;
// Scissor test region struct
typedef struct scissor_region {
int x;
int y;
int w;
int h;
} scissor_region;
// Viewport struct
typedef struct viewport {
int x;
int y;
int w;
int h;
} viewport;
// Alpha operations for alpha testing
typedef enum alphaOp {
GREATER_EQUAL = 0,
GREATER = 1,
NOT_EQUAL = 2,
EQUAL = 3,
LESS_EQUAL = 4,
LESS = 5,
NEVER = 6,
ALWAYS = 7
} alphaOp;
// Fog modes
typedef enum fogType {
LINEAR = 0,
EXP = 1,
EXP2 = 2,
DISABLED = 3
} fogType;
// Texture unit struct
typedef struct texture_unit {
GLboolean enabled;
GLboolean vertex_array_state;
GLboolean color_array_state;
GLboolean texture_array_state;
matrix4x4 stack[GENERIC_STACK_DEPTH];
texture textures[TEXTURES_NUM];
vertexArray vertex_array;
vertexArray color_array;
vertexArray texture_array;
GLenum color_object_type;
void *vertex_object;
void *color_object;
void *texture_object;
void *index_object;
int env_mode;
int tex_id;
SceGxmTextureFilter min_filter;
SceGxmTextureFilter mag_filter;
SceGxmTextureAddrMode u_mode;
SceGxmTextureAddrMode v_mode;
} texture_unit;
// Framebuffer struct
typedef struct framebuffer {
uint8_t active;
SceGxmRenderTarget *target;
SceGxmColorSurface colorbuffer;
SceGxmDepthStencilSurface depthbuffer;
void *depth_buffer_addr;
vglMemType depth_buffer_mem_type;
void *stencil_buffer_addr;
vglMemType stencil_buffer_mem_type;
} framebuffer;
// Blending
extern GLboolean blend_state; // Current state for GL_BLEND
extern SceGxmBlendFactor blend_sfactor_rgb; // Current in use RGB source blend factor
extern SceGxmBlendFactor blend_dfactor_rgb; // Current in use RGB dest blend factor
extern SceGxmBlendFactor blend_sfactor_a; // Current in use A source blend factor
extern SceGxmBlendFactor blend_dfactor_a; // Current in use A dest blend factor
// Depth Test
extern GLboolean depth_test_state; // Current state for GL_DEPTH_TEST
extern SceGxmDepthFunc gxm_depth; // Current in-use depth test func
extern GLenum orig_depth_test; // Original depth test state (used for depth test invalidation)
extern GLdouble depth_value; // Current depth test clear value
extern GLboolean depth_mask_state; // Current state for glDepthMask
// Scissor Test
extern scissor_region region; // Current scissor test region setup
extern GLboolean scissor_test_state; // Current state for GL_SCISSOR_TEST
// Stencil Test
extern uint8_t stencil_mask_front; // Current in use mask for stencil test on front
extern uint8_t stencil_mask_back; // Current in use mask for stencil test on back
extern uint8_t stencil_mask_front_write; // Current in use mask for write stencil test on front
extern uint8_t stencil_mask_back_write; // Current in use mask for write stencil test on back
extern uint8_t stencil_ref_front; // Current in use reference for stencil test on front
extern uint8_t stencil_ref_back; // Current in use reference for stencil test on back
extern SceGxmStencilOp stencil_fail_front; // Current in use stencil operation when stencil test fails for front
extern SceGxmStencilOp depth_fail_front; // Current in use stencil operation when depth test fails for front
extern SceGxmStencilOp depth_pass_front; // Current in use stencil operation when depth test passes for front
extern SceGxmStencilOp stencil_fail_back; // Current in use stencil operation when stencil test fails for back
extern SceGxmStencilOp depth_fail_back; // Current in use stencil operation when depth test fails for back
extern SceGxmStencilOp depth_pass_back; // Current in use stencil operation when depth test passes for back
extern SceGxmStencilFunc stencil_func_front; // Current in use stencil function on front
extern SceGxmStencilFunc stencil_func_back; // Current in use stencil function on back
extern GLboolean stencil_test_state; // Current state for GL_STENCIL_TEST
extern GLint stencil_value; // Current stencil test clear value
// Alpha Test
extern GLenum alpha_func; // Current in use alpha test mode
extern GLfloat alpha_ref; // Current in use alpha test reference value
extern int alpha_op; // Current in use alpha test operation
extern GLboolean alpha_test_state; // Current state for GL_ALPHA_TEST
// Polygon Mode
extern GLfloat pol_factor; // Current factor for glPolygonOffset
extern GLfloat pol_units; // Current units for glPolygonOffset
// Texture Units
extern texture_unit texture_units[GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS]; // Available texture units
extern int8_t server_texture_unit; // Current in use server side texture unit
extern int8_t client_texture_unit; // Current in use client side texture unit
extern palette *color_table; // Current in-use color table
// Matrices
extern matrix4x4 *matrix; // Current in-use matrix mode
// Miscellaneous
extern glPhase phase; // Current drawing phase for legacy openGL
extern vector4f current_color; // Current in use color
extern vector4f clear_rgba_val; // Current clear color for glClear
extern viewport gl_viewport; // Current viewport state
// Culling
extern GLboolean no_polygons_mode; // GL_TRUE when cull mode is set to GL_FRONT_AND_BACK
extern GLboolean cull_face_state; // Current state for GL_CULL_FACE
extern GLenum gl_cull_mode; // Current in use openGL cull mode
extern GLenum gl_front_face; // Current in use openGL setting for front facing primitives
// Polygon Offset
extern GLboolean pol_offset_fill; // Current state for GL_POLYGON_OFFSET_FILL
extern GLboolean pol_offset_line; // Current state for GL_POLYGON_OFFSET_LINE
extern GLboolean pol_offset_point; // Current state for GL_POLYGON_OFFSET_POINT
extern SceGxmPolygonMode polygon_mode_front; // Current in use polygon mode for front
extern SceGxmPolygonMode polygon_mode_back; // Current in use polygon mode for back
extern GLenum gl_polygon_mode_front; // Current in use polygon mode for front
extern GLenum gl_polygon_mode_back; // Current in use polygon mode for back
// Texture Environment
extern vector4f texenv_color; // Current in use texture environment color
// Fogging
extern GLboolean fogging; // Current fogging processor state
extern GLint fog_mode; // Current fogging mode (openGL)
extern fogType internal_fog_mode; // Current fogging mode (sceGxm)
extern GLfloat fog_density; // Current fogging density
extern GLfloat fog_near; // Current fogging near distance
extern GLfloat fog_far; // Current fogging far distance
extern vector4f fog_color; // Current fogging color
// Clipping Planes
extern GLint clip_plane0; // Current status of clip plane 0
extern vector4f clip_plane0_eq; // Current equation of clip plane 0
// Framebuffers
extern framebuffer *active_read_fb; // Current readback framebuffer in use
extern framebuffer *active_write_fb; // Current write framebuffer in use
#endif

463
deps/vitaGL/source/tests.c vendored Normal file
View File

@ -0,0 +1,463 @@
/*
* tests.c:
* Implementation for all drawing tests functions
*/
#include "shared.h"
// Depth Test
GLboolean depth_test_state = GL_FALSE; // Current state for GL_DEPTH_TEST
SceGxmDepthFunc gxm_depth = SCE_GXM_DEPTH_FUNC_LESS; // Current in-use depth test func
GLenum orig_depth_test; // Original depth test state (used for depth test invalidation)
GLdouble depth_value = 1.0f; // Current depth test clear value
GLboolean depth_mask_state = GL_TRUE; // Current state for glDepthMask
// Scissor Test
scissor_region region; // Current scissor test region setup
GLboolean scissor_test_state = GL_FALSE; // Current state for GL_SCISSOR_TEST
SceGxmFragmentProgram *scissor_test_fragment_program; // Scissor test fragment program
vector2f *scissor_test_vertices = NULL; // Scissor test region vertices
SceUID scissor_test_vertices_uid; // Scissor test vertices memblock id
// Stencil Test
uint8_t stencil_mask_front = 0xFF; // Current in use mask for stencil test on front
uint8_t stencil_mask_back = 0xFF; // Current in use mask for stencil test on back
uint8_t stencil_mask_front_write = 0xFF; // Current in use mask for write stencil test on front
uint8_t stencil_mask_back_write = 0xFF; // Current in use mask for write stencil test on back
uint8_t stencil_ref_front = 0; // Current in use reference for stencil test on front
uint8_t stencil_ref_back = 0; // Current in use reference for stencil test on back
SceGxmStencilOp stencil_fail_front = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when stencil test fails for front
SceGxmStencilOp depth_fail_front = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when depth test fails for front
SceGxmStencilOp depth_pass_front = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when depth test passes for front
SceGxmStencilOp stencil_fail_back = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when stencil test fails for back
SceGxmStencilOp depth_fail_back = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when depth test fails for back
SceGxmStencilOp depth_pass_back = SCE_GXM_STENCIL_OP_KEEP; // Current in use stencil operation when depth test passes for back
SceGxmStencilFunc stencil_func_front = SCE_GXM_STENCIL_FUNC_ALWAYS; // Current in use stencil function on front
SceGxmStencilFunc stencil_func_back = SCE_GXM_STENCIL_FUNC_ALWAYS; // Current in use stencil function on back
GLboolean stencil_test_state = GL_FALSE; // Current state for GL_STENCIL_TEST
GLint stencil_value = 0; // Current stencil test clear value
// Alpha Test
GLenum alpha_func = GL_ALWAYS; // Current in-use alpha test mode
GLfloat alpha_ref = 0.0f; // Current in use alpha test reference value
int alpha_op = ALWAYS; // Current in use alpha test operation
GLboolean alpha_test_state = GL_FALSE; // Current state for GL_ALPHA_TEST
void change_depth_write(SceGxmDepthWriteMode mode) {
// Change depth write mode for both front and back primitives
sceGxmSetFrontDepthWriteEnable(gxm_context, mode);
sceGxmSetBackDepthWriteEnable(gxm_context, mode);
}
void change_depth_func() {
// Setting depth function for both front and back primitives
sceGxmSetFrontDepthFunc(gxm_context, depth_test_state ? gxm_depth : SCE_GXM_DEPTH_FUNC_ALWAYS);
sceGxmSetBackDepthFunc(gxm_context, depth_test_state ? gxm_depth : SCE_GXM_DEPTH_FUNC_ALWAYS);
// Calling an update for the depth write mode
change_depth_write((depth_mask_state && depth_test_state) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
}
void invalidate_depth_test() {
// Invalidating current depth test state
orig_depth_test = depth_test_state;
depth_test_state = GL_FALSE;
// Invoking a depth function update
change_depth_func();
}
void validate_depth_test() {
// Resetting original depth test state
depth_test_state = orig_depth_test;
// Invoking a depth function update
change_depth_func();
}
void change_stencil_settings() {
if (stencil_test_state) {
// Setting stencil function for both front and back primitives
sceGxmSetFrontStencilFunc(gxm_context,
stencil_func_front,
stencil_fail_front,
depth_fail_front,
depth_pass_front,
stencil_mask_front, stencil_mask_front_write);
sceGxmSetBackStencilFunc(gxm_context,
stencil_func_back,
stencil_fail_back,
depth_fail_back,
depth_pass_back,
stencil_mask_back, stencil_mask_back_write);
// Setting stencil ref for both front and back primitives
sceGxmSetFrontStencilRef(gxm_context, stencil_ref_front);
sceGxmSetBackStencilRef(gxm_context, stencil_ref_back);
} else {
sceGxmSetFrontStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_ALWAYS,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
sceGxmSetBackStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_ALWAYS,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
}
}
GLboolean change_stencil_config(SceGxmStencilOp *cfg, GLenum new) {
// Translating openGL stencil operation value to sceGxm one
GLboolean ret = GL_TRUE;
switch (new) {
case GL_KEEP:
*cfg = SCE_GXM_STENCIL_OP_KEEP;
break;
case GL_ZERO:
*cfg = SCE_GXM_STENCIL_OP_ZERO;
break;
case GL_REPLACE:
*cfg = SCE_GXM_STENCIL_OP_REPLACE;
break;
case GL_INCR:
*cfg = SCE_GXM_STENCIL_OP_INCR;
break;
case GL_INCR_WRAP:
*cfg = SCE_GXM_STENCIL_OP_INCR_WRAP;
break;
case GL_DECR:
*cfg = SCE_GXM_STENCIL_OP_DECR;
break;
case GL_DECR_WRAP:
*cfg = SCE_GXM_STENCIL_OP_DECR_WRAP;
break;
case GL_INVERT:
*cfg = SCE_GXM_STENCIL_OP_INVERT;
break;
default:
ret = GL_FALSE;
break;
}
return ret;
}
GLboolean change_stencil_func_config(SceGxmStencilFunc *cfg, GLenum new) {
// Translating openGL stencil function to sceGxm one
GLboolean ret = GL_TRUE;
switch (new) {
case GL_NEVER:
*cfg = SCE_GXM_STENCIL_FUNC_NEVER;
break;
case GL_LESS:
*cfg = SCE_GXM_STENCIL_FUNC_LESS;
break;
case GL_LEQUAL:
*cfg = SCE_GXM_STENCIL_FUNC_LESS_EQUAL;
break;
case GL_GREATER:
*cfg = SCE_GXM_STENCIL_FUNC_GREATER;
break;
case GL_GEQUAL:
*cfg = SCE_GXM_STENCIL_FUNC_GREATER_EQUAL;
break;
case GL_EQUAL:
*cfg = SCE_GXM_STENCIL_FUNC_EQUAL;
break;
case GL_NOTEQUAL:
*cfg = SCE_GXM_STENCIL_FUNC_NOT_EQUAL;
break;
case GL_ALWAYS:
*cfg = SCE_GXM_STENCIL_FUNC_ALWAYS;
break;
default:
ret = GL_FALSE;
break;
}
return ret;
}
void update_alpha_test_settings() {
// Translating openGL alpha test operation to internal one
if (alpha_test_state) {
switch (alpha_func) {
case GL_EQUAL:
alpha_op = EQUAL;
break;
case GL_LEQUAL:
alpha_op = LESS_EQUAL;
break;
case GL_GEQUAL:
alpha_op = GREATER_EQUAL;
break;
case GL_LESS:
alpha_op = LESS;
break;
case GL_GREATER:
alpha_op = GREATER;
break;
case GL_NOTEQUAL:
alpha_op = NOT_EQUAL;
break;
case GL_NEVER:
alpha_op = NEVER;
break;
default:
alpha_op = ALWAYS;
break;
}
} else
alpha_op = ALWAYS;
}
void update_scissor_test() {
// Calculating scissor test region vertices
if (scissor_test_state) {
vector2f_convert_to_local_space(scissor_test_vertices, region.x, region.y, region.w, region.h);
}
// Setting current vertex program to clear screen one and fragment program to scissor test one
sceGxmSetVertexProgram(gxm_context, clear_vertex_program_patched);
sceGxmSetFragmentProgram(gxm_context, scissor_test_fragment_program);
// Cleaning stencil surface mask update bit on the whole screen
sceGxmSetFrontStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_NEVER,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
sceGxmSetBackStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_NEVER,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
sceGxmSetVertexStream(gxm_context, 0, clear_vertices);
sceGxmDraw(gxm_context, SCE_GXM_PRIMITIVE_TRIANGLE_FAN, SCE_GXM_INDEX_FORMAT_U16, depth_clear_indices, 4);
// Setting stencil surface mask update bit on the scissor test region
sceGxmSetFrontStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_ALWAYS,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
sceGxmSetBackStencilFunc(gxm_context,
SCE_GXM_STENCIL_FUNC_ALWAYS,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
SCE_GXM_STENCIL_OP_KEEP,
0, 0);
if (scissor_test_state)
sceGxmSetVertexStream(gxm_context, 0, scissor_test_vertices);
else
sceGxmSetVertexStream(gxm_context, 0, clear_vertices);
sceGxmDraw(gxm_context, SCE_GXM_PRIMITIVE_TRIANGLE_FAN, SCE_GXM_INDEX_FORMAT_U16, depth_clear_indices, 4);
if (scissor_test_state)
sceGxmSetRegionClip(gxm_context, SCE_GXM_REGION_CLIP_OUTSIDE, region.x, region.y, region.x + region.w, region.y + region.h);
else
sceGxmSetRegionClip(gxm_context, SCE_GXM_REGION_CLIP_OUTSIDE, gl_viewport.x, DISPLAY_HEIGHT - gl_viewport.y - gl_viewport.h, gl_viewport.x + gl_viewport.w, gl_viewport.y + gl_viewport.h);
}
void resetScissorTestRegion(void) {
// Setting scissor test region to default values
region.x = region.y = 0;
region.w = DISPLAY_WIDTH;
region.h = DISPLAY_HEIGHT;
}
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if ((width < 0) || (height < 0)) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Converting openGL scissor test region to sceGxm one
region.x = x;
region.y = DISPLAY_HEIGHT - y - height;
region.w = width;
region.h = height;
// Updating in use scissor test parameters if GL_SCISSOR_TEST is enabled
if (scissor_test_state)
update_scissor_test();
}
void glDepthFunc(GLenum func) {
// Properly translating openGL function to sceGxm one
switch (func) {
case GL_NEVER:
gxm_depth = SCE_GXM_DEPTH_FUNC_NEVER;
break;
case GL_LESS:
gxm_depth = SCE_GXM_DEPTH_FUNC_LESS;
break;
case GL_EQUAL:
gxm_depth = SCE_GXM_DEPTH_FUNC_EQUAL;
break;
case GL_LEQUAL:
gxm_depth = SCE_GXM_DEPTH_FUNC_LESS_EQUAL;
break;
case GL_GREATER:
gxm_depth = SCE_GXM_DEPTH_FUNC_GREATER;
break;
case GL_NOTEQUAL:
gxm_depth = SCE_GXM_DEPTH_FUNC_NOT_EQUAL;
break;
case GL_GEQUAL:
gxm_depth = SCE_GXM_DEPTH_FUNC_GREATER_EQUAL;
break;
case GL_ALWAYS:
gxm_depth = SCE_GXM_DEPTH_FUNC_ALWAYS;
break;
}
// Updating in use depth function
change_depth_func();
}
void glClearDepth(GLdouble depth) {
// Set current in use depth test depth value
depth_value = depth;
}
void glDepthMask(GLboolean flag) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (phase == MODEL_CREATION) {
error = GL_INVALID_OPERATION;
return;
}
#endif
// Set current in use depth mask and invoking a depth write mode update
depth_mask_state = flag;
change_depth_write((depth_mask_state && depth_test_state) ? SCE_GXM_DEPTH_WRITE_ENABLED : SCE_GXM_DEPTH_WRITE_DISABLED);
}
void glAlphaFunc(GLenum func, GLfloat ref) {
// Updating in use alpha test parameters
alpha_func = func;
alpha_ref = ref;
update_alpha_test_settings();
}
void glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) {
// Properly updating stencil operation settings
switch (face) {
case GL_FRONT:
if (!change_stencil_config(&stencil_fail_front, sfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_fail_front, dpfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_pass_front, dppass))
error = GL_INVALID_ENUM;
break;
case GL_BACK:
if (!change_stencil_config(&stencil_fail_back, sfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_fail_back, dpfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_pass_front, dppass))
error = GL_INVALID_ENUM;
break;
case GL_FRONT_AND_BACK:
if (!change_stencil_config(&stencil_fail_front, sfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&stencil_fail_back, sfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_fail_front, dpfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_fail_back, dpfail))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_pass_front, dppass))
error = GL_INVALID_ENUM;
if (!change_stencil_config(&depth_pass_back, dppass))
error = GL_INVALID_ENUM;
break;
default:
error = GL_INVALID_ENUM;
break;
}
change_stencil_settings();
}
void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) {
glStencilOpSeparate(GL_FRONT_AND_BACK, sfail, dpfail, dppass);
}
void glStencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask) {
// Properly updating stencil test function settings
switch (face) {
case GL_FRONT:
if (!change_stencil_func_config(&stencil_func_front, func))
error = GL_INVALID_ENUM;
stencil_mask_front = mask;
stencil_ref_front = ref;
break;
case GL_BACK:
if (!change_stencil_func_config(&stencil_func_back, func))
error = GL_INVALID_ENUM;
stencil_mask_back = mask;
stencil_ref_back = ref;
break;
case GL_FRONT_AND_BACK:
if (!change_stencil_func_config(&stencil_func_front, func))
error = GL_INVALID_ENUM;
if (!change_stencil_func_config(&stencil_func_back, func))
error = GL_INVALID_ENUM;
stencil_mask_front = stencil_mask_back = mask;
stencil_ref_front = stencil_ref_back = ref;
break;
default:
error = GL_INVALID_ENUM;
break;
}
change_stencil_settings();
}
void glStencilFunc(GLenum func, GLint ref, GLuint mask) {
glStencilFuncSeparate(GL_FRONT_AND_BACK, func, ref, mask);
}
void glStencilMaskSeparate(GLenum face, GLuint mask) {
// Properly updating stencil test mask settings
switch (face) {
case GL_FRONT:
stencil_mask_front_write = mask;
break;
case GL_BACK:
stencil_mask_back_write = mask;
break;
case GL_FRONT_AND_BACK:
stencil_mask_front_write = stencil_mask_back_write = mask;
break;
default:
error = GL_INVALID_ENUM;
return;
}
change_stencil_settings();
}
void glStencilMask(GLuint mask) {
glStencilMaskSeparate(GL_FRONT_AND_BACK, mask);
}
void glClearStencil(GLint s) {
stencil_value = s;
}

77
deps/vitaGL/source/texture_callbacks.c vendored Normal file
View File

@ -0,0 +1,77 @@
/*
* texture_callbacks.c:
* Implementation for texture data reading/writing callbacks
*/
#include <stdlib.h>
#include <vitasdk.h>
#include "texture_callbacks.h"
// Read callback for 32bpp unsigned RGBA format
uint32_t readRGBA(void *data) {
uint32_t res;
memcpy(&res, data, 4);
return res;
}
// Read callback for 16bpp unsigned RGBA5551 format
uint32_t readRGBA5551(void *data) {
uint16_t clr;
uint32_t r, g, b, a;
memcpy(&clr, data, 2);
r = (((clr >> 11) & 0x1F) * 0xFF) / 0x1F;
g = ((((clr << 5) >> 11) & 0x1F) * 0xFF) / 0x1F;
b = ((((clr << 10) >> 11) & 0x1F) * 0xFF) / 0x1F;
a = (((clr << 15) >> 15) & 0x1) == 1 ? 0xFF : 0x00;
return ((a << 24) | (b << 16) | (g << 8) | r);
}
// Read callback for 24bpp unsigned RGB format
uint32_t readRGB(void *data) {
uint32_t res = 0xFFFFFFFF;
memcpy(&res, data, 3);
return res;
}
// Read callback for 16bpp unsigned RG format
uint32_t readRG(void *data) {
uint32_t res = 0xFFFFFFFF;
memcpy(&res, data, 2);
return res;
}
// Read callback for 8bpp unsigned R format
uint32_t readR(void *data) {
uint32_t res = 0xFFFFFFFF;
memcpy(&res, data, 1);
return res;
}
// Write callback for 32bpp unsigned RGBA format
void writeRGBA(void *data, uint32_t color) {
memcpy(data, &color, 4);
}
// Write callback for 24bpp unsigned RGB format
void writeRGB(void *data, uint32_t color) {
memcpy(data, &color, 3);
}
// Write callback for 16bpp unsigned RG format
void writeRG(void *data, uint32_t color) {
memcpy(data, &color, 2);
}
// Write callback for 16bpp unsigned RA format
void writeRA(void *data, uint32_t color) {
uint8_t *dst = (uint8_t *)data;
uint8_t *src = (uint8_t *)&color;
dst[0] = src[0];
dst[1] = src[3];
}
// Write callback for 8bpp unsigned R format
void writeR(void *data, uint32_t color) {
memcpy(data, &color, 1);
}

23
deps/vitaGL/source/texture_callbacks.h vendored Normal file
View File

@ -0,0 +1,23 @@
/*
* texture_callbacks.h:
* Header file for texture data reading/writing callbacks exposed by texture_callbacks.c
*/
#ifndef _TEXTURE_CALLBACKS_H_
#define _TEXTURE_CALLBACKS_H_
// Read callbacks
uint32_t readR(void *data);
uint32_t readRG(void *data);
uint32_t readRGB(void *data);
uint32_t readRGBA(void *data);
uint32_t readRGBA5551(void *data);
// Write callbacks
void writeR(void *data, uint32_t color);
void writeRG(void *data, uint32_t color);
void writeRA(void *data, uint32_t color);
void writeRGB(void *data, uint32_t color);
void writeRGBA(void *data, uint32_t color);
#endif

662
deps/vitaGL/source/textures.c vendored Normal file
View File

@ -0,0 +1,662 @@
/*
* textures.c:
* Implementation for textures related functions
*/
#include "shared.h"
texture_unit texture_units[GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS]; // Available texture units
palette *color_table = NULL; // Current in-use color table
int8_t server_texture_unit = 0; // Current in use server side texture unit
/*
* ------------------------------
* - IMPLEMENTATION STARTS HERE -
* ------------------------------
*/
void glGenTextures(GLsizei n, GLuint *res) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (n < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Aliasing to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
// Reserving a texture and returning its id if available
int i, j = 0;
for (i = 0; i < TEXTURES_NUM; i++) {
if (!(tex_unit->textures[i].used)) {
res[j++] = i;
tex_unit->textures[i].used = 1;
}
if (j >= n)
break;
}
}
void glBindTexture(GLenum target, GLuint texture) {
// Aliasing to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
// Setting current in use texture id for the in use server texture unit
switch (target) {
case GL_TEXTURE_2D:
tex_unit->tex_id = texture;
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glDeleteTextures(GLsizei n, const GLuint *gl_textures) {
#ifndef SKIP_ERROR_HANDLING
// Error handling
if (n < 0) {
error = GL_INVALID_VALUE;
return;
}
#endif
// Aliasing to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
// Deallocating given textures and invalidating used texture ids
int j;
for (j = 0; j < n; j++) {
GLuint i = gl_textures[j];
tex_unit->textures[i].used = 0;
gpu_free_texture(&tex_unit->textures[i]);
}
}
void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *data) {
// Setting some aliases to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *tex = &tex_unit->textures[texture2d_idx];
SceGxmTextureFormat tex_format;
uint8_t data_bpp = 0;
// Support for legacy GL1.0 internalFormat
switch (internalFormat) {
case 1:
internalFormat = GL_RED;
break;
case 2:
internalFormat = GL_RG;
break;
case 3:
internalFormat = GL_RGB;
break;
case 4:
internalFormat = GL_RGBA;
break;
}
/*
* Callbacks are actually used to just perform down/up-sampling
* between U8 texture formats. Reads are expected to give as result
* a RGBA sample that will be wrote depending on texture format
* by the write callback
*/
void (*write_cb)(void *, uint32_t) = NULL;
uint32_t (*read_cb)(void *) = NULL;
// Detecting proper read callaback and source bpp
switch (format) {
case GL_RED:
case GL_ALPHA:
switch (type) {
case GL_UNSIGNED_BYTE:
read_cb = readR;
data_bpp = 1;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RG:
case GL_LUMINANCE_ALPHA:
switch (type) {
case GL_UNSIGNED_BYTE:
read_cb = readRG;
data_bpp = 2;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RGB:
switch (type) {
case GL_UNSIGNED_BYTE:
data_bpp = 3;
read_cb = readRGB;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RGBA:
switch (type) {
case GL_UNSIGNED_BYTE:
data_bpp = 4;
read_cb = readRGBA;
break;
case GL_UNSIGNED_SHORT_5_5_5_1:
data_bpp = 2;
read_cb = readRGBA5551;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
}
switch (target) {
case GL_TEXTURE_2D:
// Detecting proper write callback and texture format
switch (internalFormat) {
case GL_RGB:
write_cb = writeRGB;
tex_format = SCE_GXM_TEXTURE_FORMAT_U8U8U8_BGR;
break;
case GL_RGBA:
write_cb = writeRGBA;
tex_format = SCE_GXM_TEXTURE_FORMAT_U8U8U8U8_ABGR;
break;
case GL_LUMINANCE:
write_cb = writeR;
tex_format = SCE_GXM_TEXTURE_FORMAT_L8;
break;
case GL_LUMINANCE_ALPHA:
write_cb = writeRG;
tex_format = SCE_GXM_TEXTURE_FORMAT_A8L8;
break;
case GL_INTENSITY:
write_cb = writeR;
tex_format = SCE_GXM_TEXTURE_FORMAT_U8_RRRR;
break;
case GL_ALPHA:
write_cb = writeR;
tex_format = SCE_GXM_TEXTURE_FORMAT_A8;
break;
case GL_COLOR_INDEX8_EXT:
write_cb = writeR; // TODO: This is a hack
tex_format = SCE_GXM_TEXTURE_FORMAT_P8_ABGR;
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Checking if texture is too big for sceGxm
if (width > GXM_TEX_MAX_SIZE || height > GXM_TEX_MAX_SIZE) {
error = GL_INVALID_VALUE;
return;
}
// Allocating texture/mipmaps depending on user call
tex->type = internalFormat;
tex->write_cb = write_cb;
if (level == 0)
gpu_alloc_texture(width, height, tex_format, data, tex, data_bpp, read_cb, write_cb);
else
gpu_alloc_mipmaps(level, tex);
// Setting texture parameters
sceGxmTextureSetUAddrMode(&tex->gxm_tex, tex_unit->u_mode);
sceGxmTextureSetVAddrMode(&tex->gxm_tex, tex_unit->v_mode);
sceGxmTextureSetMinFilter(&tex->gxm_tex, tex_unit->min_filter);
sceGxmTextureSetMagFilter(&tex->gxm_tex, tex_unit->mag_filter);
// Setting palette if the format requests one
if (tex->valid && tex->palette_UID)
sceGxmTextureSetPalette(&tex->gxm_tex, color_table->data);
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) {
// Setting some aliases to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *target_texture = &tex_unit->textures[texture2d_idx];
// Calculating implicit texture stride and start address of requested texture modification
SceGxmTextureFormat tex_format = sceGxmTextureGetFormat(&target_texture->gxm_tex);
uint8_t bpp = tex_format_to_bytespp(tex_format);
uint32_t stride = ALIGN(sceGxmTextureGetWidth(&target_texture->gxm_tex), 8) * bpp;
uint8_t *ptr = (uint8_t *)sceGxmTextureGetData(&target_texture->gxm_tex) + xoffset * bpp + yoffset * stride;
uint8_t *ptr_line = ptr;
uint8_t data_bpp = 0;
int i, j;
/*
* Callbacks are actually used to just perform down/up-sampling
* between U8 texture formats. Reads are expected to give as result
* a RGBA sample that will be wrote depending on texture format
* by the write callback
*/
void (*write_cb)(void *, uint32_t) = NULL;
uint32_t (*read_cb)(void *) = NULL;
// Detecting proper read callback and source bpp
switch (format) {
case GL_RED:
case GL_ALPHA:
switch (type) {
case GL_UNSIGNED_BYTE:
read_cb = readR;
data_bpp = 1;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RG:
switch (type) {
case GL_UNSIGNED_BYTE:
read_cb = readRG;
data_bpp = 2;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RGB:
switch (type) {
case GL_UNSIGNED_BYTE:
data_bpp = 3;
read_cb = readRGB;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
case GL_RGBA:
switch (type) {
case GL_UNSIGNED_BYTE:
data_bpp = 4;
read_cb = readRGBA;
break;
case GL_UNSIGNED_SHORT_5_5_5_1:
data_bpp = 2;
read_cb = readRGBA5551;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
}
switch (target) {
case GL_TEXTURE_2D:
// Detecting proper write callback
switch (target_texture->type) {
case GL_RGB:
write_cb = writeRGB;
break;
case GL_RGBA:
write_cb = writeRGBA;
break;
case GL_LUMINANCE:
write_cb = writeR;
break;
case GL_LUMINANCE_ALPHA:
write_cb = writeRA;
break;
case GL_INTENSITY:
write_cb = writeR;
break;
case GL_ALPHA:
write_cb = writeR;
break;
}
// Executing texture modification via callbacks
uint8_t *data = (uint8_t *)pixels;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
uint32_t clr = read_cb((uint8_t *)data);
write_cb(ptr, clr);
data += data_bpp;
ptr += bpp;
}
ptr = ptr_line + stride;
ptr_line = ptr;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glColorTable(GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *data) {
// Checking if a color table is already enabled, if so, deallocating it
if (color_table != NULL) {
gpu_free_palette(color_table);
color_table = NULL;
}
// Calculating color table bpp
uint8_t bpp = 0;
switch (target) {
case GL_COLOR_TABLE:
switch (format) {
case GL_RGBA:
bpp = 4;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
// Allocating and initializing color table
color_table = gpu_alloc_palette(data, width, bpp);
}
void glTexParameteri(GLenum target, GLenum pname, GLint param) {
// Setting some aliases to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *tex = &tex_unit->textures[texture2d_idx];
switch (target) {
case GL_TEXTURE_2D:
switch (pname) {
case GL_TEXTURE_MIN_FILTER: // Min filter
switch (param) {
case GL_NEAREST: // Point
tex_unit->min_filter = SCE_GXM_TEXTURE_FILTER_POINT;
break;
case GL_LINEAR: // Linear
tex_unit->min_filter = SCE_GXM_TEXTURE_FILTER_LINEAR;
break;
case GL_NEAREST_MIPMAP_NEAREST: // TODO: Implement this
break;
case GL_LINEAR_MIPMAP_NEAREST: // TODO: Implement this
break;
case GL_NEAREST_MIPMAP_LINEAR: // TODO: Implement this
break;
case GL_LINEAR_MIPMAP_LINEAR: // TODO: Implement this
break;
default:
error = GL_INVALID_ENUM;
break;
}
sceGxmTextureSetMinFilter(&tex->gxm_tex, tex_unit->min_filter);
break;
case GL_TEXTURE_MAG_FILTER: // Mag Filter
switch (param) {
case GL_NEAREST: // Point
tex_unit->mag_filter = SCE_GXM_TEXTURE_FILTER_POINT;
break;
case GL_LINEAR: // Linear
tex_unit->mag_filter = SCE_GXM_TEXTURE_FILTER_LINEAR;
break;
case GL_NEAREST_MIPMAP_NEAREST: // TODO: Implement this
break;
case GL_LINEAR_MIPMAP_NEAREST: // TODO: Implement this
break;
case GL_NEAREST_MIPMAP_LINEAR: // TODO: Implement this
break;
case GL_LINEAR_MIPMAP_LINEAR: // TODO: Implement this
break;
default:
error = GL_INVALID_ENUM;
break;
}
sceGxmTextureSetMagFilter(&tex->gxm_tex, tex_unit->mag_filter);
break;
case GL_TEXTURE_WRAP_S: // U Mode
switch (param) {
case GL_CLAMP_TO_EDGE: // Clamp
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_CLAMP;
break;
case GL_REPEAT: // Repeat
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_REPEAT;
break;
case GL_MIRRORED_REPEAT: // Mirror
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_MIRROR;
break;
default:
error = GL_INVALID_ENUM;
break;
}
sceGxmTextureSetUAddrMode(&tex->gxm_tex, tex_unit->u_mode);
break;
case GL_TEXTURE_WRAP_T: // V Mode
switch (param) {
case GL_CLAMP_TO_EDGE: // Clamp
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_CLAMP;
break;
case GL_REPEAT: // Repeat
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_REPEAT;
break;
case GL_MIRRORED_REPEAT: // Mirror
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_MIRROR;
break;
default:
error = GL_INVALID_ENUM;
break;
}
sceGxmTextureSetVAddrMode(&tex->gxm_tex, tex_unit->v_mode);
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glTexParameterf(GLenum target, GLenum pname, GLfloat param) {
// Setting some aliases to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *tex = &tex_unit->textures[texture2d_idx];
switch (target) {
case GL_TEXTURE_2D:
switch (pname) {
case GL_TEXTURE_MIN_FILTER: // Min Filter
if (param == GL_NEAREST)
tex_unit->min_filter = SCE_GXM_TEXTURE_FILTER_POINT; // Point
if (param == GL_LINEAR)
tex_unit->min_filter = SCE_GXM_TEXTURE_FILTER_LINEAR; // Linear
sceGxmTextureSetMinFilter(&tex->gxm_tex, tex_unit->min_filter);
break;
case GL_TEXTURE_MAG_FILTER: // Mag filter
if (param == GL_NEAREST)
tex_unit->mag_filter = SCE_GXM_TEXTURE_FILTER_POINT; // Point
else if (param == GL_LINEAR)
tex_unit->mag_filter = SCE_GXM_TEXTURE_FILTER_LINEAR; // Linear
sceGxmTextureSetMagFilter(&tex->gxm_tex, tex_unit->mag_filter);
break;
case GL_TEXTURE_WRAP_S: // U Mode
if (param == GL_CLAMP_TO_EDGE)
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_CLAMP; // Clamp
else if (param == GL_REPEAT)
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_REPEAT; // Repeat
else if (param == GL_MIRRORED_REPEAT)
tex_unit->u_mode = SCE_GXM_TEXTURE_ADDR_MIRROR; // Mirror
sceGxmTextureSetUAddrMode(&tex->gxm_tex, tex_unit->u_mode);
break;
case GL_TEXTURE_WRAP_T: // V Mode
if (param == GL_CLAMP_TO_EDGE)
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_CLAMP; // Clamp
else if (param == GL_REPEAT)
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_REPEAT; // Repeat
else if (param == GL_MIRRORED_REPEAT)
tex_unit->v_mode = SCE_GXM_TEXTURE_ADDR_MIRROR; // Mirror
sceGxmTextureSetVAddrMode(&tex->gxm_tex, tex_unit->v_mode);
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glActiveTexture(GLenum texture) {
// Changing current in use server texture unit
#ifndef SKIP_ERROR_HANDLING
if ((texture < GL_TEXTURE0) && (texture > GL_TEXTURE31))
error = GL_INVALID_ENUM;
else
#endif
server_texture_unit = texture - GL_TEXTURE0;
}
void glGenerateMipmap(GLenum target) {
// Setting some aliases to make code more readable
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *tex = &tex_unit->textures[texture2d_idx];
#ifndef SKIP_ERROR_HANDLING
// Checking if current texture is valid
if (!tex->valid)
return;
#endif
switch (target) {
case GL_TEXTURE_2D:
// Generating mipmaps to the max possible level
gpu_alloc_mipmaps(-1, tex);
// Setting texture parameters
sceGxmTextureSetUAddrMode(&tex->gxm_tex, tex_unit->u_mode);
sceGxmTextureSetVAddrMode(&tex->gxm_tex, tex_unit->v_mode);
sceGxmTextureSetMinFilter(&tex->gxm_tex, tex_unit->min_filter);
sceGxmTextureSetMagFilter(&tex->gxm_tex, tex_unit->mag_filter);
sceGxmTextureSetMipFilter(&tex->gxm_tex, SCE_GXM_TEXTURE_MIP_FILTER_ENABLED);
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void glTexEnvf(GLenum target, GLenum pname, GLfloat param) {
// Aliasing texture unit for cleaner code
texture_unit *tex_unit = &texture_units[server_texture_unit];
// Properly changing texture environment settings as per request
switch (target) {
case GL_TEXTURE_ENV:
switch (pname) {
case GL_TEXTURE_ENV_MODE:
if (param == GL_MODULATE)
tex_unit->env_mode = MODULATE;
else if (param == GL_DECAL)
tex_unit->env_mode = DECAL;
else if (param == GL_REPLACE)
tex_unit->env_mode = REPLACE;
else if (param == GL_BLEND)
tex_unit->env_mode = BLEND;
else if (param == GL_ADD)
tex_unit->env_mode = ADD;
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
}
}
void glTexEnvi(GLenum target, GLenum pname, GLint param) {
// Aliasing texture unit for cleaner code
texture_unit *tex_unit = &texture_units[server_texture_unit];
// Properly changing texture environment settings as per request
switch (target) {
case GL_TEXTURE_ENV:
switch (pname) {
case GL_TEXTURE_ENV_MODE:
switch (param) {
case GL_MODULATE:
tex_unit->env_mode = MODULATE;
break;
case GL_DECAL:
tex_unit->env_mode = DECAL;
break;
case GL_REPLACE:
tex_unit->env_mode = REPLACE;
break;
case GL_BLEND:
tex_unit->env_mode = BLEND;
break;
case GL_ADD:
tex_unit->env_mode = ADD;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
break;
default:
error = GL_INVALID_ENUM;
break;
}
}
void *vglGetTexDataPointer(GLenum target) {
// Aliasing texture unit for cleaner code
texture_unit *tex_unit = &texture_units[server_texture_unit];
int texture2d_idx = tex_unit->tex_id;
texture *tex = &tex_unit->textures[texture2d_idx];
switch (target) {
case GL_TEXTURE_2D:
return tex->data;
break;
default:
error = GL_INVALID_ENUM;
break;
}
return NULL;
}

330
deps/vitaGL/source/utils/gpu_utils.c vendored Normal file
View File

@ -0,0 +1,330 @@
/*
* gpu_utils.c:
* Utilities for GPU usage
*/
#include "../shared.h"
// VRAM usage setting
uint8_t use_vram = 0;
// vitaGL memory pool setup
static void *pool_addr = NULL;
static unsigned int pool_index = 0;
static unsigned int pool_size = 0;
void *gpu_alloc_mapped(size_t size, vglMemType *type) {
// Allocating requested memblock
void *res = mempool_alloc(size, *type);
// Requested memory type finished, using other one
if (res == NULL) {
*type = use_vram ? VGL_MEM_RAM : VGL_MEM_VRAM;
res = mempool_alloc(size, *type);
}
// Even the other one failed, using our last resort
if (res == NULL) {
*type = VGL_MEM_SLOW;
res = mempool_alloc(size, *type);
}
if (res == NULL) {
*type = VGL_MEM_EXTERNAL;
res = malloc(size);
}
return res;
}
void *gpu_vertex_usse_alloc_mapped(size_t size, unsigned int *usse_offset) {
// Allocating memblock
void *addr = mempool_alloc(size, VGL_MEM_RAM);
// Mapping memblock into sceGxm as vertex USSE memory
sceGxmMapVertexUsseMemory(addr, size, usse_offset);
// Returning memblock starting address
return addr;
}
void gpu_vertex_usse_free_mapped(void *addr) {
// Unmapping memblock from sceGxm as vertex USSE memory
sceGxmUnmapVertexUsseMemory(addr);
// Deallocating memblock
mempool_free(addr, VGL_MEM_RAM);
}
void *gpu_fragment_usse_alloc_mapped(size_t size, unsigned int *usse_offset) {
// Allocating memblock
void *addr = mempool_alloc(size, VGL_MEM_RAM);
// Mapping memblock into sceGxm as fragment USSE memory
sceGxmMapFragmentUsseMemory(addr, size, usse_offset);
// Returning memblock starting address
return addr;
}
void gpu_fragment_usse_free_mapped(void *addr) {
// Unmapping memblock from sceGxm as fragment USSE memory
sceGxmUnmapFragmentUsseMemory(addr);
// Deallocating memblock
mempool_free(addr, VGL_MEM_RAM);
}
void *gpu_pool_malloc(unsigned int size) {
// Reserving vitaGL mempool space
if ((pool_index + size) < pool_size) {
void *addr = (void *)((unsigned int)pool_addr + pool_index);
pool_index += size;
return addr;
}
return NULL;
}
void *gpu_pool_memalign(unsigned int size, unsigned int alignment) {
// Aligning requested memory size
unsigned int new_index = ALIGN(pool_index, alignment);
// Reserving vitaGL mempool space
if ((new_index + size) < pool_size) {
void *addr = (void *)((unsigned int)pool_addr + new_index);
pool_index = new_index + size;
return addr;
}
return NULL;
}
unsigned int gpu_pool_free_space() {
// Returning vitaGL available mempool space
return pool_size - pool_index;
}
void gpu_pool_reset() {
// Resetting vitaGL available mempool space
pool_index = 0;
}
void gpu_pool_init(uint32_t temp_pool_size) {
// Allocating vitaGL mempool
pool_size = temp_pool_size;
vglMemType type = VGL_MEM_RAM;
pool_addr = gpu_alloc_mapped(temp_pool_size, &type);
}
int tex_format_to_bytespp(SceGxmTextureFormat format) {
// Calculating bpp for the requested texture format
switch (format & 0x9f000000U) {
case SCE_GXM_TEXTURE_BASE_FORMAT_U8:
case SCE_GXM_TEXTURE_BASE_FORMAT_S8:
case SCE_GXM_TEXTURE_BASE_FORMAT_P8:
return 1;
case SCE_GXM_TEXTURE_BASE_FORMAT_U4U4U4U4:
case SCE_GXM_TEXTURE_BASE_FORMAT_U8U3U3U2:
case SCE_GXM_TEXTURE_BASE_FORMAT_U1U5U5U5:
case SCE_GXM_TEXTURE_BASE_FORMAT_U5U6U5:
case SCE_GXM_TEXTURE_BASE_FORMAT_S5S5U6:
case SCE_GXM_TEXTURE_BASE_FORMAT_U8U8:
case SCE_GXM_TEXTURE_BASE_FORMAT_S8S8:
return 2;
case SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8:
case SCE_GXM_TEXTURE_BASE_FORMAT_S8S8S8:
return 3;
case SCE_GXM_TEXTURE_BASE_FORMAT_U8U8U8U8:
case SCE_GXM_TEXTURE_BASE_FORMAT_S8S8S8S8:
case SCE_GXM_TEXTURE_BASE_FORMAT_F32:
case SCE_GXM_TEXTURE_BASE_FORMAT_U32:
case SCE_GXM_TEXTURE_BASE_FORMAT_S32:
default:
return 4;
}
}
palette *gpu_alloc_palette(const void *data, uint32_t w, uint32_t bpe) {
// Allocating a palette object
palette *res = (palette *)malloc(sizeof(palette));
res->type = use_vram ? VGL_MEM_VRAM : VGL_MEM_RAM;
// Allocating palette data buffer
void *texture_palette = gpu_alloc_mapped(256 * sizeof(uint32_t), &res->type);
// Initializing palette
if (data == NULL)
memset(texture_palette, 0, 256 * sizeof(uint32_t));
else if (bpe == 4)
memcpy(texture_palette, data, w * sizeof(uint32_t));
res->data = texture_palette;
// Returning palette
return res;
}
void gpu_free_texture(texture *tex) {
// Deallocating texture
if (tex->data != NULL)
mempool_free(tex->data, tex->mtype);
// Invalidating texture object
tex->valid = 0;
}
void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t)) {
// If there's already a texture in passed texture object we first dealloc it
if (tex->valid)
gpu_free_texture(tex);
// Getting texture format bpp
uint8_t bpp = tex_format_to_bytespp(format);
// Allocating texture data buffer
tex->mtype = use_vram ? VGL_MEM_VRAM : VGL_MEM_RAM;
const int tex_size = ALIGN(w, 8) * h * bpp;
void *texture_data = gpu_alloc_mapped(tex_size, &tex->mtype);
if (texture_data != NULL) {
// Initializing texture data buffer
if (data != NULL) {
int i, j;
uint8_t *src = (uint8_t *)data;
uint8_t *dst;
for (i = 0; i < h; i++) {
dst = ((uint8_t *)texture_data) + (ALIGN(w, 8) * bpp) * i;
for (j = 0; j < w; j++) {
uint32_t clr = read_cb(src);
write_cb(dst, clr);
src += src_bpp;
dst += bpp;
}
}
} else
memset(texture_data, 0, tex_size);
// Initializing texture and validating it
sceGxmTextureInitLinear(&tex->gxm_tex, texture_data, format, w, h, 0);
if ((format & 0x9f000000U) == SCE_GXM_TEXTURE_BASE_FORMAT_P8)
tex->palette_UID = 1;
else
tex->palette_UID = 0;
tex->valid = 1;
tex->data = texture_data;
}
}
void gpu_alloc_mipmaps(int level, texture *tex) {
// Getting current mipmap count in passed texture
uint32_t count = sceGxmTextureGetMipmapCount(&tex->gxm_tex);
// Getting textures info and calculating bpp
uint32_t w, h, stride;
uint32_t orig_w = sceGxmTextureGetWidth(&tex->gxm_tex);
uint32_t orig_h = sceGxmTextureGetHeight(&tex->gxm_tex);
SceGxmTextureFormat format = sceGxmTextureGetFormat(&tex->gxm_tex);
uint32_t bpp = tex_format_to_bytespp(format);
// Checking if we need at least one more new mipmap level
if ((level > count) || (level < 0)) { // Note: level < 0 means we will use max possible mipmaps level
uint32_t jumps[10];
for (w = 1; w < orig_w; w <<= 1) {
}
for (h = 1; h < orig_h; h <<= 1) {
}
// Calculating new texture data buffer size
uint32_t size = 0;
int j;
if (level > 0) {
for (j = 0; j < level; j++) {
jumps[j] = max(w, 8) * h * bpp;
size += jumps[j];
w /= 2;
h /= 2;
}
} else {
level = 0;
while ((w > 1) && (h > 1)) {
jumps[level] = max(w, 8) * h * bpp;
size += jumps[level];
w /= 2;
h /= 2;
level++;
}
}
// Calculating needed sceGxmTransfer format for the downscale process
SceGxmTransferFormat fmt;
switch (tex->type) {
case GL_RGBA:
fmt = SCE_GXM_TRANSFER_FORMAT_U8U8U8U8_ABGR;
break;
case GL_RGB:
fmt = SCE_GXM_TRANSFER_FORMAT_U8U8U8_BGR;
default:
break;
}
// Moving texture data to heap and deallocating texture memblock
GLboolean has_temp_buffer = GL_TRUE;
stride = ALIGN(orig_w, 8);
void *temp = (void *)malloc(stride * orig_h * bpp);
if (temp == NULL) { // If we finished newlib heap, we delay texture free
has_temp_buffer = GL_FALSE;
temp = sceGxmTextureGetData(&tex->gxm_tex);
} else {
memcpy(temp, sceGxmTextureGetData(&tex->gxm_tex), stride * orig_h * bpp);
gpu_free_texture(tex);
}
// Allocating the new texture data buffer
tex->mtype = use_vram ? VGL_MEM_VRAM : VGL_MEM_RAM;
void *texture_data = gpu_alloc_mapped(size, &tex->mtype);
// Moving back old texture data from heap to texture memblock
memcpy(texture_data, temp, stride * orig_h * bpp);
if (has_temp_buffer)
free(temp);
else
gpu_free_texture(tex);
tex->valid = 1;
// Performing a chain downscale process to generate requested mipmaps
uint8_t *curPtr = (uint8_t *)texture_data;
uint32_t curWidth = orig_w;
uint32_t curHeight = orig_h;
if (curWidth % 2)
curWidth--;
if (curHeight % 2)
curHeight--;
for (j = 0; j < level - 1; j++) {
uint32_t curSrcStride = ALIGN(curWidth, 8);
uint32_t curDstStride = ALIGN(curWidth >> 1, 8);
uint8_t *dstPtr = curPtr + jumps[j];
sceGxmTransferDownscale(
fmt, curPtr, 0, 0,
curWidth, curHeight,
curSrcStride * bpp,
fmt, dstPtr, 0, 0,
curDstStride * bpp,
NULL, SCE_GXM_TRANSFER_FRAGMENT_SYNC, NULL);
curPtr = dstPtr;
curWidth /= 2;
curHeight /= 2;
}
// Initializing texture in sceGxm
sceGxmTextureInitLinear(&tex->gxm_tex, texture_data, format, orig_w, orig_h, level);
tex->data = texture_data;
}
}
void gpu_free_palette(palette *pal) {
// Deallocating palette memblock and object
if (pal == NULL)
return;
mempool_free(pal->data, pal->type);
free(pal);
}

81
deps/vitaGL/source/utils/gpu_utils.h vendored Normal file
View File

@ -0,0 +1,81 @@
/*
* gpu_utils.h:
* Header file for the GPU utilities exposed by gpu_utils.c
*/
#ifndef _GPU_UTILS_H_
#define _GPU_UTILS_H_
#include "mem_utils.h"
// Align a value to the requested alignment
#define ALIGN(x, a) (((x) + ((a)-1)) & ~((a)-1))
// Texture object struct
typedef struct texture {
SceGxmTexture gxm_tex;
void *data;
vglMemType mtype;
SceUID palette_UID;
SceUID depth_UID;
uint8_t used;
uint8_t valid;
uint32_t type;
void (*write_cb)(void *, uint32_t);
} texture;
// Palette object struct
typedef struct palette {
void *data;
vglMemType type;
} palette;
// Alloc a generic memblock into sceGxm mapped memory
void *gpu_alloc_mapped(size_t size, vglMemType *type);
// Alloc into sceGxm mapped memory a vertex USSE memblock
void *gpu_vertex_usse_alloc_mapped(size_t size, unsigned int *usse_offset);
// Dealloc from sceGxm mapped memory a vertex USSE memblock
void gpu_vertex_usse_free_mapped(void *addr);
// Alloc into sceGxm mapped memory a fragment USSE memblock
void *gpu_fragment_usse_alloc_mapped(size_t size, unsigned int *usse_offset);
// Dealloc from sceGxm mapped memory a fragment USSE memblock
void gpu_fragment_usse_free_mapped(void *addr);
// Reserve a memory space from vitaGL mempool
void *gpu_pool_malloc(unsigned int size);
// Reserve an aligned memory space from vitaGL mempool
void *gpu_pool_memalign(unsigned int size, unsigned int alignment);
// Returns available free space on vitaGL mempool
unsigned int gpu_pool_free_space();
// Resets vitaGL mempool
void gpu_pool_reset();
// Alloc vitaGL mempool
void gpu_pool_init(uint32_t temp_pool_size);
// Calculate bpp for a requested texture format
int tex_format_to_bytespp(SceGxmTextureFormat format);
// Alloc a texture
void gpu_alloc_texture(uint32_t w, uint32_t h, SceGxmTextureFormat format, const void *data, texture *tex, uint8_t src_bpp, uint32_t (*read_cb)(void *), void (*write_cb)(void *, uint32_t));
// Dealloc a texture
void gpu_free_texture(texture *tex);
// Alloc a palette
palette *gpu_alloc_palette(const void *data, uint32_t w, uint32_t bpe);
// Dealloc a palette
void gpu_free_palette(palette *pal);
// Generate mipmaps for a given texture
void gpu_alloc_mipmaps(int level, texture *tex);
#endif

Some files were not shown because too many files have changed in this diff Show More