From 7d36568eeb7a1040f50b8c923552ac1b3f7c3a7b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 01:13:39 +0100 Subject: [PATCH 001/105] add base of rlsw.h --- src/external/rlsw.h | 1909 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1909 insertions(+) create mode 100644 src/external/rlsw.h diff --git a/src/external/rlsw.h b/src/external/rlsw.h new file mode 100644 index 000000000..55205d057 --- /dev/null +++ b/src/external/rlsw.h @@ -0,0 +1,1909 @@ +/** + * MIT License + * + * Copyright (c) 2025 Le Juez Victor + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + #ifndef RLSW_H + #define RLSW_H + + #include + #include + + #ifndef SW_MALLOC + # define SW_MALLOC(sz) malloc(sz) + #endif + + #ifndef SW_FREE + # define SW_FREE(ptr) free(ptr) + #endif + + #ifndef SW_MAX_PROJECTION_STACK_SIZE + # define SW_MAX_PROJECTION_STACK_SIZE 2 + #endif + + #ifndef SW_MAX_MODELVIEW_STACK_SIZE + # define SW_MAX_MODELVIEW_STACK_SIZE 8 + #endif + + #ifndef SW_MAX_TEXTURE_STACK_SIZE + # define SW_MAX_TEXTURE_STACK_SIZE 4 + #endif + + #ifndef SW_MAX_TEXTURES + # define SW_MAX_TEXTURES 128 + #endif + + #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES + # define SW_MAX_CLIPPED_POLYGON_VERTICES 12 + #endif + + #ifndef SW_CLIP_EPSILON + # define SW_CLIP_EPSILON 1e-4f + #endif + + typedef enum { + SW_PROJECTION, + SW_MODELVIEW, + SW_TEXTURE + } SWmatrix; + + typedef enum { + SW_VERTEX_ARRAY, + SW_TEXTURE_COORD_ARRAY, + SW_NORMAL_ARRAY, + SW_COLOR_ARRAY + } SWarray; + + typedef enum { + SW_POINTS, + SW_LINES, + SW_TRIANGLES, + SW_QUADS, + } SWfill; + + typedef enum { + SW_CULL_FRONT, + SW_CULL_BACK, + } SWcull; + + typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp + } SWpixelformat; + + typedef enum { + SW_NEAREST, + SW_LINEAR, + SW_NEAREST_MIPMAP_NEAREST, + SW_NEAREST_MIPMAP_LINEAR, + SW_LINEAR_MIPMAP_NEAREST, + SW_LINEAR_MIPMAP_LINEAR + } SWfilter; + + typedef enum { + SW_REPEAT, + SW_CLAMP_TO_EDGE, + SW_MIRRORED_REPEAT + } SWwrap; + + typedef enum { + SW_TEXTURE_MIN_FILTER, + SW_TEXTURE_MAG_FILTER, + SW_TEXTURE_WRAP_S, + SW_TEXTURE_WRAP_T + } SWtexparam; + + typedef enum { + SW_NO_ERROR, + SW_INVALID_ENUM, + SW_INVALID_VALUE, + SW_STACK_OVERFLOW, + SW_STACK_UNDERFLOW, + SW_INVALID_OPERATION, + SW_ERROR_OUT_OF_MEMORY + } SWerrcode; + + /* === Public API === */ + + void swInit(int w, int h); + void swClose(void); + + void* swGetColorBuffer(int* w, int* h); + + void swMatrixMode(SWmatrix mode); + void swPushMatrix(void); + void swPopMatrix(void); + void swLoadIdentity(void); + void swTranslatef(float x, float y, float z); + void swRotatef(float angle, float x, float y, float z); + void swScalef(float x, float y, float z); + void swMultMatrixf(const float* mat); + void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); + void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); + + void swViewport(int x, int y, int width, int height); + + void swClearColor(float r, float g, float b, float a); + void swClear(void); + + void swBegin(SWfill mode); + void swEnd(void); + + void swVertex2i(int x, int y); + void swVertex2f(float x, float y); + void swVertex2fv(const float* v); + void swVertex3i(int x, int y, int z); + void swVertex3f(float x, float y, float z); + void swVertex3fv(const float* v); + void swVertex4i(int x, int y, int z, int w); + void swVertex4f(float x, float y, float z, float w); + void swVertex4fv(const float* v); + + void swColor1ui(uint32_t color); + void swColor3ub(uint8_t r, uint8_t g, uint8_t b); + void swColor3ubv(const uint8_t* v); + void swColor3us(uint16_t r, uint16_t g, uint16_t b); + void swColor3usv(const uint16_t* v); + void swColor3ui(uint32_t r, uint32_t g, uint32_t b); + void swColor3uiv(const uint32_t* v); + void swColor3f(float r, float g, float b); + void swColor3fv(const float* v); + void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); + void swColor4ubv(const uint8_t* v); + void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); + void swColor4usv(const uint16_t* v); + void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); + void swColor4uiv(const uint32_t* v); + void swColor4f(float r, float g, float b, float a); + void swColor4fv(const float* v); + + void swTexCoord2f(float u, float v); + void swTexCoordfv(const float* v); + + void swNormal3f(float x, float y, float z); + void swNormal3fv(const float* v); + + void swBindArray(SWarray type, void *buffer); + void swDrawArrays(SWfill mode, int offset, int count); + + uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); + void swUnloadTexture(uint32_t id); + + void swTextureParameters(uint32_t id, int param, int value); + void swBindTexture(uint32_t id); + + #endif // RLSW_H + + + + #ifdef RLSW_IMPL + + #include + #include + + /* === Defines and Macros === */ + + #define SW_PI 3.14159265358979323846f + #define SW_DEG2RAD (SW_PI/180.0f) + #define SW_RAD2DEG (180.0f/SW_PI) + + /* === Internal Structs === */ + + typedef float sw_matrix_t[4*4]; + typedef uint16_t sw_half_t; + + typedef struct { + + float position[4]; // Position coordinates + float normal[3]; // Normal vector + float texcoord[2]; // Texture coordinates + float color[4]; // Color + + float homogeneous[4]; // Homogeneous coordinates + float screen[2]; // Screen coordinates + + } sw_vertex_t; + + typedef struct { + + const void* pixels; + int width; + int height; + int format; + + SWfilter minFilter; + SWfilter magFilter; + + SWwrap sWrap; + SWwrap tWrap; + + float tx; + float ty; + + } sw_texture_t; + + typedef struct { + uint8_t *color; // 32-bit RGBA color buffer + uint16_t *depth; // 16-bit fixed fract buffer + int width, height; + } sw_framebuffer_t; + + typedef struct { + + sw_framebuffer_t framebuffer; + uint8_t clearColor[4]; // Color used to clear the screen + uint16_t clearDepth; // Depth value used to clear the screen + + uint32_t currentTexture; + sw_matrix_t *currentMatrix; + + uint32_t blendFunction; + uint32_t depthFunction; + + int vpPos[2]; // Represents the top-left corner of the viewport + int vpDim[2]; // Represents the dimensions of the viewport (minus one) + int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) + int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + + struct { + float* positions; + float* texcoords; + float* normals; + uint8_t* colors; + } array; + + sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering + int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' + + SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + float pointSize; // Rasterized point size + float lineWidth; // Rasterized line width + + sw_matrix_t matProjection; // Projection matrix, user adjustable + sw_matrix_t matTexture; // Texture matrix, user adjustable + sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) + sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) + sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally + + sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations + sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations + sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations + uint32_t stackProjectionCounter; // Counter for matrix stack operations + uint32_t stackModelviewCounter; // Counter for matrix stack operations + uint32_t stackTextureCounter; // Counter for matrix stack operations + + SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) + bool modelMatrixUsed; // Flag indicating if the model matrix is used + + SWcull cullFace; // Faces to cull + SWerrcode errCode; // Last error code + + sw_texture_t* loadedTextures; + int loadedTextureCount; + + uint32_t* freeTextureIds; + int freeTextureIdCount; + + } sw_data_t; + + + /* === Global Data === */ + + static sw_data_t RLSW = { 0 }; + + + /* === Helper Functions === */ + + static inline void sw_matrix_id(sw_matrix_t dst) + { + dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; + dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; + dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; + dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; + } + + static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) + { + sw_matrix_t result; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float sum = 0.0; + for (int k = 0; k < 4; k++) { + sum += left[i * 4 + k] * right[k * 4 + j]; + } + result[i * 4 + j] = sum; + } + } + for (int i = 0; i < 16; i++) { + dst[i] = result[i]; + } + } + + static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) + { + float tmp[4] = { + mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], + mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], + mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], + mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] + }; + + for (int i = 0; i < 4; i++) { + dst[i] = tmp[i]; + } + } + + static inline float sw_lerp(float a, float b, float t) + { + return a + t * (b - a); + } + + static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) + { + sw_vertex_t result; + for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); + } + return result; + } + + static inline uint32_t sw_cvt_hf_ui(uint16_t h) + { + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; + } + + static inline float sw_cvt_hf(sw_half_t y) + { + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; + } + + static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) + { + float gray = (float)((uint8_t*)pixels)[offset] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) + { + float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) + { + float value = ((float*)pixels)[offset]; + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) + { + float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; + float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = alpha; + } + + static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7E0) >> 5) / 63; + color[2] = (float)(pixel & 0x1F) / 31; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) + { + const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) + { + const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) + { + const float *pixel = (float*)pixels + 3 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7C0) >> 6) / 31; + color[2] = (float)((pixel & 0x3E) >> 1) / 31; + color[3] = (float)(pixel & 0x1); + } + + static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF000) >> 12) / 15; + color[1] = (float)((pixel & 0xF00) >> 8) / 15; + color[2] = (float)((pixel & 0xF0) >> 4) / 15; + color[3] = (float)(pixel & 0xF) / 15; + } + + static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) + { + const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = (float)pixel[3] / 255; + } + + static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) + { + const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = sw_cvt_hf(pixel[3]); + } + + static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) + { + const float *pixel = (float*)pixels + 4 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = pixel[3]; + } + + static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) + { + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_get_pixel_grayscale(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_get_pixel_grayscale_alpha(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_get_pixel_rgb_565(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_get_pixel_rgb_888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_get_pixel_rgba_5551(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_get_pixel_rgba_4444(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_get_pixel_rgba_8888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_get_pixel_red_32(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_get_pixel_rgb_323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_get_pixel_rgba_32323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_get_pixel_red_16(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_get_pixel_rgb_161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_get_pixel_rgba_16161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } + } + + static inline void sw_map_repeat(int* out, float in, int max) + { + // Upscale to nearest texture coordinates + // NOTE: We use '(int)(x+0.5)' although this is incorrect + // regarding the direction of rounding in case of negative values + // and also less accurate than roundf, but it remains so much more + // efficient that it is preferable for now to opt for this option. + + *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); + } + + static inline void sw_map_clamp_to_edge(int* out, float in, int max) + { + in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); + *out = (int)(in * (max - 1) + 0.5f); + } + + static inline void sw_map_mirrored_repeat(int* out, float in, int max) + { + in = fmodf(fabsf(in), 2); + if (in > 1.0f) in = 1.0f - (in - 1.0f); + *out = (int)(in * (max - 1) + 0.5f); + } + + static inline void sw_map(int* out, float in, int max, SWwrap mode) + { + switch (mode) { + case SW_REPEAT: + sw_map_repeat(out, in, max); + break; + case SW_CLAMP_TO_EDGE: + sw_map_clamp_to_edge(out, in, max); + break; + case SW_MIRRORED_REPEAT: + sw_map_mirrored_repeat(out, in, max); + break; + } + } + + static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) + { + int x, y; + sw_map(&x, u, tex->width, tex->sWrap); + sw_map(&y, v, tex->height, tex->tWrap); + sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); + } + + static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) + { + int x0, y0, x1, y1; + sw_map(&x0, u, tex->width, tex->sWrap); + sw_map(&y0, v, tex->height, tex->tWrap); + sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + + float fx = u * (tex->width - 1) - x0; + float fy = v * (tex->height - 1) - y0; + + float c00[4], c10[4], c01[4], c11[4]; + sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + + float c0[4], c1[4]; + for (int i = 0; i < 4; i++) { + float a = sw_lerp(c00[i], c10[i], fx); + float b = sw_lerp(c01[i], c11[i], fx); + color[i] = sw_lerp(a, b, fy); + } + } + + static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, + float xDu, float yDu, float xDv, float yDv) + { + // TODO: It seems there are some incorrect detections depending on the context + // This is probably due to the fact that the fractions are obtained + // at the wrong moment during rasterization. It would be worth reviewing + // this, although the scanline method complicates things. + + // Calculate the derivatives for each axis + float du = sqrtf(xDu * xDu + yDu * yDu); + float dv = sqrtf(xDv * xDv + yDv * yDv); + float L = (du > dv) ? du : dv; + + // Select the filter based on the size of the footprint + if (L > 1.0f) { + // Minification + if (tex->minFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->minFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } else { + // Magnification + if (tex->magFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->magFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } + } + + static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + + int inputCounter = *vertexCounter; + *vertexCounter = 0; + + const sw_vertex_t *prevVt = &input[inputCounter-1]; + char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + if (prevDot*currDot < 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + return *vertexCounter > 0; + } + + static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + for (int iAxis = 0; iAxis < 3; iAxis++) + { + if (*vertexCounter == 0) return false; + + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int inputCounter; + + const sw_vertex_t *prevVt; + char prevDot; + + // Clip against first plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot * currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + if (*vertexCounter == 0) return false; + + // Clip against opposite plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot*currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + } + + return *vertexCounter > 0; + } + + void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; + sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + } + + if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + + // Calculation of the reciprocal of W for normalization + // as well as perspective correct attributes + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + + // Division of XYZ coordinates by weight + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + + // Division of texture coordinates (perspective correct) + v->texcoord[0] *= v->homogeneous[3]; + v->texcoord[1] *= v->homogeneous[3]; + + // Transform to screen space + v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + } + } + } + + void sw_raster_scanline(const sw_texture_t* tex, const sw_vertex_t* start, const sw_vertex_t* end, float yDu, float yDv) + { + // Calculate the horizontal width and avoid division by zero + float dx = end->screen[0] - start->screen[0]; + if (fabsf(dx) < 1e-4f) return; + + // Convert and center the screen coordinates + int xStart = (int)(start->screen[0] + 0.5f); + int xEnd = (int)(end->screen[0] + 0.5f); + int y = (int)(start->screen[1] + 0.5f); + + // Calculate the initial interpolation parameter and its increment + float dt = 1.0f / dx; + float t = (xStart - start->screen[0]) * dt; + + // Calculate the horizontal gradients for UV coordinates + float xDu = (end->texcoord[0] - start->texcoord[0]) * dt; + float xDv = (end->texcoord[1] - start->texcoord[1]) * dt; + + // Pre-calculate the color differences for interpolation + float dcol[4]; + for (int i = 0; i < 4; i++) { + dcol[i] = end->color[i] - start->color[i]; + } + + // Pre-calculate the differences in Z and W (for depth testing and perspective correction) + float dz = end->homogeneous[2] - start->homogeneous[2]; + float dw = end->homogeneous[3] - start->homogeneous[3]; + + // Initialize the interpolated texture coordinates + float u = start->texcoord[0] + t * xDu; + float v = start->texcoord[1] + t * xDv; + + // Pre-calculate the starting pointer for the color framebuffer row + uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); + uint8_t* dst = row_ptr + xStart * 4; + + // Pre-calculate the pointer for the depth buffer row + uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; + uint16_t* dptr = depth_row; + + // Scanline rasterization loop + for (int x = xStart; x < xEnd; x++) { + // Interpolate Z and W for depth testing and perspective correction + float w = 1.0f / (start->homogeneous[3] + t * dw); + float z = start->homogeneous[2] + t * dz; + + // Depth testing with direct access to the depth buffer + // TODO: Implement different depth funcs? + float depth = (float)(*dptr) / UINT16_MAX; + if (z > depth) goto discard; + + // Update the depth buffer + *dptr = (uint16_t)(z * UINT16_MAX); + + // Sample the texture + float texColor[4]; + sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); + + // Interpolate the color and modulate by the texture color + for (int i = 0; i < 4; i++) { + float lerp = start->color[i] + t * dcol[i]; + float finalColor = texColor[i] * lerp; + // Inline clamp to keep the value between 0 and 1 + // NOTE: The need for clamp, the colors could be a sign of problem during interpolation (?) + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); + dst[i] = (uint8_t)(finalColor * 255.0f); + } + + // Increment the interpolation parameter, UVs, and pointers + discard: + t += dt; + u += xDu; + v += xDv; + dst += 4; + dptr++; + } + } + + void sw_raster_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_texture_t* tex) + { + // Swap vertices by increasing y + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } + if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } + + // Extracting coordinates from the sorted vertices + float x0 = v0->screen[0], y0 = v0->screen[1]; + float x1 = v1->screen[0], y1 = v1->screen[1]; + float x2 = v2->screen[0], y2 = v2->screen[1]; + + // Reject degenerate triangles + float height = y2 - y0; + if (height < 1e-4f) return; + + // Global calculation of vertical texture gradients for the triangle + float yDu = (v2->texcoord[0] - v0->texcoord[0]) / height; + float yDv = (v2->texcoord[1] - v0->texcoord[1]) / height; + + // Precompute the inverse of the triangle height and + // edge lengths with checks to avoid division by zero. + float inv_height = 1.0f / height; + float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; + float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; + + // Pre-calculation of slopes (dx/dy) + float dx02 = (x2 - x0) * inv_height; + float dx01 = (x1 - x0) * inv_y1y0; + float dx12 = (x2 - x1) * inv_y2y1; + + // Y bounds (vertical clipping) + int yTop = (int)(y0 + 0.5f); + int yMiddle = (int)(y1 + 0.5f); + int yBottom = (int)(y2 + 0.5f); + + // Initializing scanline variables + float xLeft = x0, xRight = x0; + sw_vertex_t start, end; + + // Scanline for the upper part of the triangle + for (int y = yTop; y < yMiddle; y++) { + float dy = (float)y - y0; + float t1 = dy * inv_height; + float t2 = dy * inv_y1y0; + + // Optimized interpolation + start = sw_lerp_vertex(v0, v2, t1); + end = sw_lerp_vertex(v0, v1, t2); + start.screen[0] = xLeft; + start.screen[1] = (float)y; + end.screen[0] = xRight; + end.screen[1] = (float)y; + + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } + sw_raster_scanline(tex, &start, &end, yDu, yDv); + + // Incremental update + xLeft += dx02; + xRight += dx01; + } + + // Scanline for the lower part of the triangle + xRight = x1; // Restart the right side from the second vertex + for (int y = yMiddle; y < yBottom; y++) { + float dy = (float)y - y0; + float t1 = dy * inv_height; + float t2 = (float)(y - y1) * inv_y2y1; + + // Optimized interpolation + start = sw_lerp_vertex(v0, v2, t1); + end = sw_lerp_vertex(v1, v2, t2); + start.screen[0] = xLeft; + start.screen[1] = (float)y; + end.screen[0] = xRight; + end.screen[1] = (float)y; + + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } + sw_raster_scanline(tex, &start, &end, yDu, yDv); + + // Incremental update + xLeft += dx02; + xRight += dx12; + } + } + + void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) + { + int vertexCounter = 3; + + sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; + polygon[0] = *v0; + polygon[1] = *v1; + polygon[2] = *v2; + + sw_project_and_clip_triangle(polygon, &vertexCounter); + + if (vertexCounter < 3) { + return; + } + + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + + static inline bool sw_is_texture_id_valid(uint32_t id) + { + bool valid = true; + + if (id == 0) valid = false; + else if (id >= SW_MAX_TEXTURES) valid = false; + else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + + return true; + } + + static inline bool sw_is_texture_filter_valid(int filter) + { + return (filter == SW_NEAREST || filter == SW_LINEAR); + } + + static inline bool sw_is_texture_wrap_valid(int wrap) + { + return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); + } + + /* === Public Implementation === */ + + void swInit(int w, int h) + { + swViewport(0, 0, w, h); + + RLSW.framebuffer.color = SW_MALLOC(4 * w * h); + RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + + RLSW.clearColor[0] = 0; + RLSW.clearColor[1] = 0; + RLSW.clearColor[2] = 0; + RLSW.clearColor[3] = 255; + RLSW.clearDepth = UINT16_MAX; + + RLSW.currentMatrixMode = SW_MODELVIEW; + RLSW.currentMatrix = &RLSW.matView; + + sw_matrix_id(RLSW.matProjection); + sw_matrix_id(RLSW.matTexture); + sw_matrix_id(RLSW.matModel); + sw_matrix_id(RLSW.matView); + + RLSW.vertexBuffer[0].color[0] = 1.0f; + RLSW.vertexBuffer[0].color[1] = 1.0f; + RLSW.vertexBuffer[0].color[2] = 1.0f; + RLSW.vertexBuffer[0].color[3] = 1.0f; + + RLSW.vertexBuffer[0].texcoord[0] = 0.0f; + RLSW.vertexBuffer[0].texcoord[1] = 0.0f; + + RLSW.vertexBuffer[0].normal[0] = 0.0f; + RLSW.vertexBuffer[0].normal[1] = 0.0f; + RLSW.vertexBuffer[0].normal[2] = 1.0f; + + static const float defTex[3*2*2] = + { + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + }; + + RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].width = 2; + RLSW.loadedTextures[0].height = 2; + RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + RLSW.loadedTextures[0].minFilter = SW_NEAREST; + RLSW.loadedTextures[0].magFilter = SW_NEAREST; + RLSW.loadedTextures[0].sWrap = SW_REPEAT; + RLSW.loadedTextures[0].tWrap = SW_REPEAT; + RLSW.loadedTextures[0].tx = 0.5f; + RLSW.loadedTextures[0].ty = 0.5f; + + RLSW.loadedTextureCount = 1; + } + + void swClose(void) + { + SW_FREE(RLSW.framebuffer.color); + SW_FREE(RLSW.framebuffer.depth); + + SW_FREE(RLSW.loadedTextures); + SW_FREE(RLSW.freeTextureIds); + } + + void* swGetColorBuffer(int* w, int* h) + { + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; + } + + void swMatrixMode(SWmatrix mode) + { + switch (mode) { + case SW_PROJECTION: + RLSW.currentMatrix = &RLSW.matProjection; + break; + case SW_MODELVIEW: + RLSW.currentMatrix = RLSW.modelMatrixUsed + ? &RLSW.matModel : &RLSW.matView; + break; + case SW_TEXTURE: + RLSW.currentMatrix = &RLSW.matTexture; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + RLSW.currentMatrixMode = mode; + } + + void swPushMatrix(void) + { + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; + } + RLSW.stackProjectionCounter++; + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + if (RLSW.modelMatrixUsed) { + for (int i = 0; i < 16; i++) { + RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; + } + RLSW.stackModelviewCounter++; + } else { + RLSW.currentMatrix = &RLSW.matModel; + RLSW.modelMatrixUsed = true; + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; + } + RLSW.stackTextureCounter++; + break; + + } + } + + void swPopMatrix(void) + { + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackProjectionCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; + } + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter == 0) { + if (!RLSW.modelMatrixUsed) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + sw_matrix_id(RLSW.matModel); + RLSW.currentMatrix = &RLSW.matView; + RLSW.modelMatrixUsed = false; + } else { + RLSW.stackModelviewCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; + } + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackTextureCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; + } + break; + + } + } + + void swLoadIdentity(void) + { + sw_matrix_id(*RLSW.currentMatrix); + } + + void swTranslatef(float x, float y, float z) + { + sw_matrix_t mat; + sw_matrix_id(mat); + + mat[12] = x; + mat[13] = y; + mat[14] = z; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swRotatef(float angle, float x, float y, float z) + { + angle *= SW_DEG2RAD; + + sw_matrix_t mat; + sw_matrix_id(mat); + + float lengthSq = x*x + y*y + z*z; + + if (lengthSq != 1.0f && lengthSq != 0.0f) { + float invLenght = 1.0f / lengthSq; + x *= invLenght; + y *= invLenght; + z *= invLenght; + } + + float sinres = sinf(angle); + float cosres = cosf(angle); + float t = 1.0f - cosres; + + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; + mat[10] = z*z*t + cosres; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swScalef(float x, float y, float z) + { + sw_matrix_t mat; + + mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; + mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; + mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; + mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swMultMatrixf(const float* mat) + { + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) + { + sw_matrix_t mat = { 0 }; + + float rl = right - left; + float tb = top - bottom; + float fn = zfar - znear; + + mat[0] = (znear * 2.0f) / rl; + mat[5] = (znear * 2.0f) / tb; + + mat[8] = (right + left) / rl; + mat[9] = (top + bottom) / tb; + mat[10] = -(zfar + znear) / fn; + mat[11] = -1.0f; + + mat[14] = -(zfar * znear * 2.0f) / fn; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) + { + sw_matrix_t mat = { 0 }; + + float rl = (right - left); + float tb = (top - bottom); + float fn = (zfar - znear); + + mat[0] = 2.0f / rl; + mat[5] = 2.0f / tb; + + mat[10] = -2.0f / fn; + mat[11] = 0.0f; + mat[12] = -(left + right) / rl; + mat[13] = -(top + bottom) / tb; + + mat[14] = -(zfar + znear) / fn; + mat[15] = 1.0f; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swViewport(int x, int y, int width, int height) + { + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; + } + + void swClearColor(float r, float g, float b, float a) + { + RLSW.clearColor[0] = r * 255; + RLSW.clearColor[1] = g * 255; + RLSW.clearColor[2] = b * 255; + RLSW.clearColor[3] = a * 255; + } + + void swClear(void) + { + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + for (int i = 0; i < size; i++) { + ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); + RLSW.framebuffer.depth[i] = RLSW.clearDepth; + } + } + + void swBegin(SWfill mode) + { + if (mode < SW_POINTS || mode > SW_QUADS) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.vertexCounter = 0; + RLSW.fillMode = mode; + } + + void swEnd(void) + { + RLSW.vertexCounter = 0; + } + + void swVertex2i(int x, int y) + { + float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + swVertex4fv(v); + } + + void swVertex2f(float x, float y) + { + float v[4] = { x, y, 0.0f, 1.0f }; + swVertex4fv(v); + } + + void swVertex2fv(const float* v) + { + float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + swVertex4fv(v4); + } + + void swVertex3i(int x, int y, int z) + { + float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + swVertex4fv(v); + } + + void swVertex3f(float x, float y, float z) + { + float v[4] = { x, y, z, 1.0f }; + swVertex4fv(v); + } + + void swVertex3fv(const float* v) + { + float v4[4] = { v[0], v[1], v[2], 1.0f }; + swVertex4fv(v4); + } + + void swVertex4i(int x, int y, int z, int w) + { + float v[4] = { (float)x, (float)y, (float)z, (float)w }; + swVertex4fv(v); + } + + void swVertex4f(float x, float y, float z, float w) + { + float v[4] = { x, y, z, w }; + swVertex4fv(v); + } + + void swVertex4fv(const float* v) + { + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; + } + RLSW.vertexCounter++; + + int neededVertices = 0; + switch (RLSW.fillMode) { + case SW_POINTS: + neededVertices = 1; + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + neededVertices = 3; + break; + case SW_QUADS: + neededVertices = 4; + break; + } + + if (RLSW.vertexCounter == neededVertices) { + + // TODO: Optimize MVP calculation + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + + switch (RLSW.fillMode) { + case SW_POINTS: + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_render_triangle( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } + + RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; + RLSW.vertexCounter = 0; + } + else { + RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; + } + } + + void swColor1ui(uint32_t color) + { + union { + uint32_t v; + uint8_t a[4]; + } c = { .v = color }; + + float cv[4]; + cv[0] = (float)c.a[0] / 255; + cv[1] = (float)c.a[1] / 255; + cv[2] = (float)c.a[2] / 255; + cv[3] = (float)c.a[3] / 255; + + swColor4fv(cv); + } + + void swColor3ub(uint8_t r, uint8_t g, uint8_t b) + { + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3ubv(const uint8_t* v) + { + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3us(uint16_t r, uint16_t g, uint16_t b) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3usv(const uint16_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3ui(uint32_t r, uint32_t g, uint32_t b) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3uiv(const uint32_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3f(float r, float g, float b) + { + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3fv(const float* v) + { + float cv[4]; + cv[0] = v[0]; + cv[1] = v[1]; + cv[2] = v[2]; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) + { + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = (float)a / 255; + + swColor4fv(cv); + } + + void swColor4ubv(const uint8_t* v) + { + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = (float)v[3] / 255; + + swColor4fv(cv); + } + + void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = (float)((uint8_t)(a >> 8)) / 255; + + swColor4fv(cv); + } + + void swColor4usv(const uint16_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; + + swColor4fv(cv); + } + + void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = (float)((uint8_t)(a >> 24)) / 255; + + swColor4fv(cv); + } + + void swColor4uiv(const uint32_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; + + swColor4fv(cv); + } + + void swColor4f(float r, float g, float b, float a) + { + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = a; + + swColor4fv(cv); + } + + void swColor4fv(const float* v) + { + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; + } + } + + void swTexCoord2f(float u, float v) + { + float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + } + + void swTexCoordfv(const float* v) + { + float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + } + + void swNormal3f(float x, float y, float z) + { + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; + } + + void swNormal3fv(const float* v) + { + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; + } + + void swBindArray(SWarray type, void *buffer) + { + switch (type) { + case SW_VERTEX_ARRAY: + RLSW.array.positions = buffer; + break; + case SW_TEXTURE_COORD_ARRAY: + RLSW.array.texcoords = buffer; + break; + case SW_NORMAL_ARRAY: + RLSW.array.normals = buffer; + break; + case SW_COLOR_ARRAY: + RLSW.array.colors = buffer; + break; + default: + break; + } + } + + void swDrawArrays(SWfill mode, int offset, int count) + { + if (RLSW.array.positions == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + swBegin(mode); + + for (int i = offset; i < count; i++) { + if (RLSW.array.texcoords) { + swTexCoordfv(RLSW.array.texcoords + 2 * i); + } + if (RLSW.array.normals) { + swNormal3fv(RLSW.array.normals + 3 * i); + } + if (RLSW.array.colors) { + swColor4ubv(RLSW.array.colors + 4 * i); + } + swVertex3fv(RLSW.array.positions + 3 * i); + } + + swEnd(); + } + + uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) + { + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_ERROR_OUT_OF_MEMORY; + return 0; + } + + sw_texture_t texture = { 0 }; + texture.pixels = data; + texture.width = width; + texture.height = height; + texture.format = format; + texture.minFilter = SW_NEAREST; + texture.magFilter = SW_NEAREST; + texture.sWrap = SW_REPEAT; + texture.tWrap = SW_REPEAT; + texture.tx = 1.0f / width; + texture.ty = 1.0f / height; + (void)mipmapCount; + + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + + RLSW.loadedTextures[id] = texture; + + return id; + } + + void swUnloadTexture(uint32_t id) + { + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + RLSW.loadedTextures[id].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; + } + + void swTextureParameters(uint32_t id, int param, int value) + { + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + switch (param) { + + case SW_TEXTURE_MIN_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->minFilter = value; + break; + + case SW_TEXTURE_MAG_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->magFilter = value; + break; + + case SW_TEXTURE_WRAP_S: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->sWrap = value; + break; + + case SW_TEXTURE_WRAP_T: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->tWrap = value; + break; + + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + + } + } + + void swBindTexture(uint32_t id) + { + if (id >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.currentTexture = id; + } + + #endif // RLSW_IMPL + \ No newline at end of file From 8aed39ff49e9f832cbc04cf15525a4ef0f78a61e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 16:35:34 +0100 Subject: [PATCH 002/105] implement state support Also replace the triangle rasterization functions with macros that generate specific functions for each state of the rendering system. Also, add the OpenGL definitions in order to add a binding for rlgl. --- src/external/rlsw.h | 3943 ++++++++++++++++++++++--------------------- 1 file changed, 2058 insertions(+), 1885 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 55205d057..6af7fcf84 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -22,1888 +22,2061 @@ * SOFTWARE. */ - #ifndef RLSW_H - #define RLSW_H - - #include - #include - - #ifndef SW_MALLOC - # define SW_MALLOC(sz) malloc(sz) - #endif - - #ifndef SW_FREE - # define SW_FREE(ptr) free(ptr) - #endif - - #ifndef SW_MAX_PROJECTION_STACK_SIZE - # define SW_MAX_PROJECTION_STACK_SIZE 2 - #endif - - #ifndef SW_MAX_MODELVIEW_STACK_SIZE - # define SW_MAX_MODELVIEW_STACK_SIZE 8 - #endif - - #ifndef SW_MAX_TEXTURE_STACK_SIZE - # define SW_MAX_TEXTURE_STACK_SIZE 4 - #endif - - #ifndef SW_MAX_TEXTURES - # define SW_MAX_TEXTURES 128 - #endif - - #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES - # define SW_MAX_CLIPPED_POLYGON_VERTICES 12 - #endif - - #ifndef SW_CLIP_EPSILON - # define SW_CLIP_EPSILON 1e-4f - #endif - - typedef enum { - SW_PROJECTION, - SW_MODELVIEW, - SW_TEXTURE - } SWmatrix; - - typedef enum { - SW_VERTEX_ARRAY, - SW_TEXTURE_COORD_ARRAY, - SW_NORMAL_ARRAY, - SW_COLOR_ARRAY - } SWarray; - - typedef enum { - SW_POINTS, - SW_LINES, - SW_TRIANGLES, - SW_QUADS, - } SWfill; - - typedef enum { - SW_CULL_FRONT, - SW_CULL_BACK, - } SWcull; - - typedef enum { - SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) - SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) - SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) - SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp - } SWpixelformat; - - typedef enum { - SW_NEAREST, - SW_LINEAR, - SW_NEAREST_MIPMAP_NEAREST, - SW_NEAREST_MIPMAP_LINEAR, - SW_LINEAR_MIPMAP_NEAREST, - SW_LINEAR_MIPMAP_LINEAR - } SWfilter; - - typedef enum { - SW_REPEAT, - SW_CLAMP_TO_EDGE, - SW_MIRRORED_REPEAT - } SWwrap; - - typedef enum { - SW_TEXTURE_MIN_FILTER, - SW_TEXTURE_MAG_FILTER, - SW_TEXTURE_WRAP_S, - SW_TEXTURE_WRAP_T - } SWtexparam; - - typedef enum { - SW_NO_ERROR, - SW_INVALID_ENUM, - SW_INVALID_VALUE, - SW_STACK_OVERFLOW, - SW_STACK_UNDERFLOW, - SW_INVALID_OPERATION, - SW_ERROR_OUT_OF_MEMORY - } SWerrcode; - - /* === Public API === */ - - void swInit(int w, int h); - void swClose(void); - - void* swGetColorBuffer(int* w, int* h); - - void swMatrixMode(SWmatrix mode); - void swPushMatrix(void); - void swPopMatrix(void); - void swLoadIdentity(void); - void swTranslatef(float x, float y, float z); - void swRotatef(float angle, float x, float y, float z); - void swScalef(float x, float y, float z); - void swMultMatrixf(const float* mat); - void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); - void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); - - void swViewport(int x, int y, int width, int height); - - void swClearColor(float r, float g, float b, float a); - void swClear(void); - - void swBegin(SWfill mode); - void swEnd(void); - - void swVertex2i(int x, int y); - void swVertex2f(float x, float y); - void swVertex2fv(const float* v); - void swVertex3i(int x, int y, int z); - void swVertex3f(float x, float y, float z); - void swVertex3fv(const float* v); - void swVertex4i(int x, int y, int z, int w); - void swVertex4f(float x, float y, float z, float w); - void swVertex4fv(const float* v); - - void swColor1ui(uint32_t color); - void swColor3ub(uint8_t r, uint8_t g, uint8_t b); - void swColor3ubv(const uint8_t* v); - void swColor3us(uint16_t r, uint16_t g, uint16_t b); - void swColor3usv(const uint16_t* v); - void swColor3ui(uint32_t r, uint32_t g, uint32_t b); - void swColor3uiv(const uint32_t* v); - void swColor3f(float r, float g, float b); - void swColor3fv(const float* v); - void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); - void swColor4ubv(const uint8_t* v); - void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); - void swColor4usv(const uint16_t* v); - void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); - void swColor4uiv(const uint32_t* v); - void swColor4f(float r, float g, float b, float a); - void swColor4fv(const float* v); - - void swTexCoord2f(float u, float v); - void swTexCoordfv(const float* v); - - void swNormal3f(float x, float y, float z); - void swNormal3fv(const float* v); - - void swBindArray(SWarray type, void *buffer); - void swDrawArrays(SWfill mode, int offset, int count); - - uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); - void swUnloadTexture(uint32_t id); - - void swTextureParameters(uint32_t id, int param, int value); - void swBindTexture(uint32_t id); - - #endif // RLSW_H - - - - #ifdef RLSW_IMPL - - #include - #include - - /* === Defines and Macros === */ - - #define SW_PI 3.14159265358979323846f - #define SW_DEG2RAD (SW_PI/180.0f) - #define SW_RAD2DEG (180.0f/SW_PI) - - /* === Internal Structs === */ - - typedef float sw_matrix_t[4*4]; - typedef uint16_t sw_half_t; - - typedef struct { - - float position[4]; // Position coordinates - float normal[3]; // Normal vector - float texcoord[2]; // Texture coordinates - float color[4]; // Color - - float homogeneous[4]; // Homogeneous coordinates - float screen[2]; // Screen coordinates - - } sw_vertex_t; - - typedef struct { - - const void* pixels; - int width; - int height; - int format; - - SWfilter minFilter; - SWfilter magFilter; - - SWwrap sWrap; - SWwrap tWrap; - - float tx; - float ty; - - } sw_texture_t; - - typedef struct { - uint8_t *color; // 32-bit RGBA color buffer - uint16_t *depth; // 16-bit fixed fract buffer - int width, height; - } sw_framebuffer_t; - - typedef struct { - - sw_framebuffer_t framebuffer; - uint8_t clearColor[4]; // Color used to clear the screen - uint16_t clearDepth; // Depth value used to clear the screen - - uint32_t currentTexture; - sw_matrix_t *currentMatrix; - - uint32_t blendFunction; - uint32_t depthFunction; - - int vpPos[2]; // Represents the top-left corner of the viewport - int vpDim[2]; // Represents the dimensions of the viewport (minus one) - int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) - int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) - - struct { - float* positions; - float* texcoords; - float* normals; - uint8_t* colors; - } array; - - sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering - int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - - SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) - float pointSize; // Rasterized point size - float lineWidth; // Rasterized line width - - sw_matrix_t matProjection; // Projection matrix, user adjustable - sw_matrix_t matTexture; // Texture matrix, user adjustable - sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) - sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) - sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally - - sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations - sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations - sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations - uint32_t stackProjectionCounter; // Counter for matrix stack operations - uint32_t stackModelviewCounter; // Counter for matrix stack operations - uint32_t stackTextureCounter; // Counter for matrix stack operations - - SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) - bool modelMatrixUsed; // Flag indicating if the model matrix is used - - SWcull cullFace; // Faces to cull - SWerrcode errCode; // Last error code - - sw_texture_t* loadedTextures; - int loadedTextureCount; - - uint32_t* freeTextureIds; - int freeTextureIdCount; - - } sw_data_t; - - - /* === Global Data === */ - - static sw_data_t RLSW = { 0 }; - - - /* === Helper Functions === */ - - static inline void sw_matrix_id(sw_matrix_t dst) - { - dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; - dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; - dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; - dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; - } - - static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) - { - sw_matrix_t result; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - float sum = 0.0; - for (int k = 0; k < 4; k++) { - sum += left[i * 4 + k] * right[k * 4 + j]; - } - result[i * 4 + j] = sum; - } - } - for (int i = 0; i < 16; i++) { - dst[i] = result[i]; - } - } - - static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) - { - float tmp[4] = { - mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], - mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], - mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], - mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] - }; - - for (int i = 0; i < 4; i++) { - dst[i] = tmp[i]; - } - } - - static inline float sw_lerp(float a, float b, float t) - { - return a + t * (b - a); - } - - static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) - { - sw_vertex_t result; - for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { - ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); - } - return result; - } - - static inline uint32_t sw_cvt_hf_ui(uint16_t h) - { - uint32_t s = (uint32_t)(h & 0x8000) << 16; - int32_t em = h & 0x7fff; - - // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) - int32_t r = (em + (112 << 10)) << 13; - - // denormal: flush to zero - r = (em < (1 << 10)) ? 0 : r; - - // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases - // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 - r += (em >= (31 << 10)) ? (112 << 23) : 0; - - return s | r; - } - - static inline float sw_cvt_hf(sw_half_t y) - { - union { float f; uint32_t i; } v = { - .i = sw_cvt_hf_ui(y) - }; - return v.f; - } - - static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) - { - float gray = (float)((uint8_t*)pixels)[offset] / 255; - - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) - { - float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); - - color[0] = value; - color[1] = value; - color[2] = value; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) - { - float value = ((float*)pixels)[offset]; - - color[0] = value; - color[1] = value; - color[2] = value; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) - { - float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; - float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; - - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = alpha; - } - - static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF800) >> 11) / 31; - color[1] = (float)((pixel & 0x7E0) >> 5) / 63; - color[2] = (float)(pixel & 0x1F) / 31; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) - { - const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; - - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) - { - const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; - - color[0] = sw_cvt_hf(pixel[0]); - color[1] = sw_cvt_hf(pixel[1]); - color[2] = sw_cvt_hf(pixel[2]); - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) - { - const float *pixel = (float*)pixels + 3 * offset; - - color[0] = pixel[0]; - color[1] = pixel[1]; - color[2] = pixel[2]; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF800) >> 11) / 31; - color[1] = (float)((pixel & 0x7C0) >> 6) / 31; - color[2] = (float)((pixel & 0x3E) >> 1) / 31; - color[3] = (float)(pixel & 0x1); - } - - static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF000) >> 12) / 15; - color[1] = (float)((pixel & 0xF00) >> 8) / 15; - color[2] = (float)((pixel & 0xF0) >> 4) / 15; - color[3] = (float)(pixel & 0xF) / 15; - } - - static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) - { - const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = (float)pixel[3] / 255; - } - - static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) - { - const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; - - color[0] = sw_cvt_hf(pixel[0]); - color[1] = sw_cvt_hf(pixel[1]); - color[2] = sw_cvt_hf(pixel[2]); - color[3] = sw_cvt_hf(pixel[3]); - } - - static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) - { - const float *pixel = (float*)pixels + 4 * offset; - - color[0] = pixel[0]; - color[1] = pixel[1]; - color[2] = pixel[2]; - color[3] = pixel[3]; - } - - static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) - { - switch (format) { - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: - sw_get_pixel_grayscale(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: - sw_get_pixel_grayscale_alpha(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: - sw_get_pixel_rgb_565(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: - sw_get_pixel_rgb_888(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: - sw_get_pixel_rgba_5551(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: - sw_get_pixel_rgba_4444(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: - sw_get_pixel_rgba_8888(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32: - sw_get_pixel_red_32(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: - sw_get_pixel_rgb_323232(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: - sw_get_pixel_rgba_32323232(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16: - sw_get_pixel_red_16(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: - sw_get_pixel_rgb_161616(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: - sw_get_pixel_rgba_16161616(color, pixels, offset); - break; - - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: - break; - - } - } - - static inline void sw_map_repeat(int* out, float in, int max) - { - // Upscale to nearest texture coordinates - // NOTE: We use '(int)(x+0.5)' although this is incorrect - // regarding the direction of rounding in case of negative values - // and also less accurate than roundf, but it remains so much more - // efficient that it is preferable for now to opt for this option. - - *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); - } - - static inline void sw_map_clamp_to_edge(int* out, float in, int max) - { - in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); - *out = (int)(in * (max - 1) + 0.5f); - } - - static inline void sw_map_mirrored_repeat(int* out, float in, int max) - { - in = fmodf(fabsf(in), 2); - if (in > 1.0f) in = 1.0f - (in - 1.0f); - *out = (int)(in * (max - 1) + 0.5f); - } - - static inline void sw_map(int* out, float in, int max, SWwrap mode) - { - switch (mode) { - case SW_REPEAT: - sw_map_repeat(out, in, max); - break; - case SW_CLAMP_TO_EDGE: - sw_map_clamp_to_edge(out, in, max); - break; - case SW_MIRRORED_REPEAT: - sw_map_mirrored_repeat(out, in, max); - break; - } - } - - static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) - { - int x, y; - sw_map(&x, u, tex->width, tex->sWrap); - sw_map(&y, v, tex->height, tex->tWrap); - sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); - } - - static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) - { - int x0, y0, x1, y1; - sw_map(&x0, u, tex->width, tex->sWrap); - sw_map(&y0, v, tex->height, tex->tWrap); - sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); - sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); - - float fx = u * (tex->width - 1) - x0; - float fy = v * (tex->height - 1) - y0; - - float c00[4], c10[4], c01[4], c11[4]; - sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); - sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); - sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); - sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); - - float c0[4], c1[4]; - for (int i = 0; i < 4; i++) { - float a = sw_lerp(c00[i], c10[i], fx); - float b = sw_lerp(c01[i], c11[i], fx); - color[i] = sw_lerp(a, b, fy); - } - } - - static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, - float xDu, float yDu, float xDv, float yDv) - { - // TODO: It seems there are some incorrect detections depending on the context - // This is probably due to the fact that the fractions are obtained - // at the wrong moment during rasterization. It would be worth reviewing - // this, although the scanline method complicates things. - - // Calculate the derivatives for each axis - float du = sqrtf(xDu * xDu + yDu * yDu); - float dv = sqrtf(xDv * xDv + yDv * yDv); - float L = (du > dv) ? du : dv; - - // Select the filter based on the size of the footprint - if (L > 1.0f) { - // Minification - if (tex->minFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); - } else if (tex->minFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); - } - } else { - // Magnification - if (tex->magFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); - } else if (tex->magFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); - } - } - } - - static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - - int inputCounter = *vertexCounter; - *vertexCounter = 0; - - const sw_vertex_t *prevVt = &input[inputCounter-1]; - char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], - (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - return *vertexCounter > 0; - } - - static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - for (int iAxis = 0; iAxis < 3; iAxis++) - { - if (*vertexCounter == 0) return false; - - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - int inputCounter; - - const sw_vertex_t *prevVt; - char prevDot; - - // Clip against first plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - if (*vertexCounter == 0) return false; - - // Clip against opposite plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - } - - return *vertexCounter > 0; - } - - void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; - sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); - } - - if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { - for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - - // Calculation of the reciprocal of W for normalization - // as well as perspective correct attributes - v->homogeneous[3] = 1.0f / v->homogeneous[3]; - - // Division of XYZ coordinates by weight - v->homogeneous[0] *= v->homogeneous[3]; - v->homogeneous[1] *= v->homogeneous[3]; - v->homogeneous[2] *= v->homogeneous[3]; - - // Division of texture coordinates (perspective correct) - v->texcoord[0] *= v->homogeneous[3]; - v->texcoord[1] *= v->homogeneous[3]; - - // Transform to screen space - v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; - } - } - } - - void sw_raster_scanline(const sw_texture_t* tex, const sw_vertex_t* start, const sw_vertex_t* end, float yDu, float yDv) - { - // Calculate the horizontal width and avoid division by zero - float dx = end->screen[0] - start->screen[0]; - if (fabsf(dx) < 1e-4f) return; - - // Convert and center the screen coordinates - int xStart = (int)(start->screen[0] + 0.5f); - int xEnd = (int)(end->screen[0] + 0.5f); - int y = (int)(start->screen[1] + 0.5f); - - // Calculate the initial interpolation parameter and its increment - float dt = 1.0f / dx; - float t = (xStart - start->screen[0]) * dt; - - // Calculate the horizontal gradients for UV coordinates - float xDu = (end->texcoord[0] - start->texcoord[0]) * dt; - float xDv = (end->texcoord[1] - start->texcoord[1]) * dt; - - // Pre-calculate the color differences for interpolation - float dcol[4]; - for (int i = 0; i < 4; i++) { - dcol[i] = end->color[i] - start->color[i]; - } - - // Pre-calculate the differences in Z and W (for depth testing and perspective correction) - float dz = end->homogeneous[2] - start->homogeneous[2]; - float dw = end->homogeneous[3] - start->homogeneous[3]; - - // Initialize the interpolated texture coordinates - float u = start->texcoord[0] + t * xDu; - float v = start->texcoord[1] + t * xDv; - - // Pre-calculate the starting pointer for the color framebuffer row - uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); - uint8_t* dst = row_ptr + xStart * 4; - - // Pre-calculate the pointer for the depth buffer row - uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; - uint16_t* dptr = depth_row; - - // Scanline rasterization loop - for (int x = xStart; x < xEnd; x++) { - // Interpolate Z and W for depth testing and perspective correction - float w = 1.0f / (start->homogeneous[3] + t * dw); - float z = start->homogeneous[2] + t * dz; - - // Depth testing with direct access to the depth buffer - // TODO: Implement different depth funcs? - float depth = (float)(*dptr) / UINT16_MAX; - if (z > depth) goto discard; - - // Update the depth buffer - *dptr = (uint16_t)(z * UINT16_MAX); - - // Sample the texture - float texColor[4]; - sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); - - // Interpolate the color and modulate by the texture color - for (int i = 0; i < 4; i++) { - float lerp = start->color[i] + t * dcol[i]; - float finalColor = texColor[i] * lerp; - // Inline clamp to keep the value between 0 and 1 - // NOTE: The need for clamp, the colors could be a sign of problem during interpolation (?) - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); - dst[i] = (uint8_t)(finalColor * 255.0f); - } - - // Increment the interpolation parameter, UVs, and pointers - discard: - t += dt; - u += xDu; - v += xDv; - dst += 4; - dptr++; - } - } - - void sw_raster_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_texture_t* tex) - { - // Swap vertices by increasing y - if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } - if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } - if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } - - // Extracting coordinates from the sorted vertices - float x0 = v0->screen[0], y0 = v0->screen[1]; - float x1 = v1->screen[0], y1 = v1->screen[1]; - float x2 = v2->screen[0], y2 = v2->screen[1]; - - // Reject degenerate triangles - float height = y2 - y0; - if (height < 1e-4f) return; - - // Global calculation of vertical texture gradients for the triangle - float yDu = (v2->texcoord[0] - v0->texcoord[0]) / height; - float yDv = (v2->texcoord[1] - v0->texcoord[1]) / height; - - // Precompute the inverse of the triangle height and - // edge lengths with checks to avoid division by zero. - float inv_height = 1.0f / height; - float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; - float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; - - // Pre-calculation of slopes (dx/dy) - float dx02 = (x2 - x0) * inv_height; - float dx01 = (x1 - x0) * inv_y1y0; - float dx12 = (x2 - x1) * inv_y2y1; - - // Y bounds (vertical clipping) - int yTop = (int)(y0 + 0.5f); - int yMiddle = (int)(y1 + 0.5f); - int yBottom = (int)(y2 + 0.5f); - - // Initializing scanline variables - float xLeft = x0, xRight = x0; - sw_vertex_t start, end; - - // Scanline for the upper part of the triangle - for (int y = yTop; y < yMiddle; y++) { - float dy = (float)y - y0; - float t1 = dy * inv_height; - float t2 = dy * inv_y1y0; - - // Optimized interpolation - start = sw_lerp_vertex(v0, v2, t1); - end = sw_lerp_vertex(v0, v1, t2); - start.screen[0] = xLeft; - start.screen[1] = (float)y; - end.screen[0] = xRight; - end.screen[1] = (float)y; - - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } - sw_raster_scanline(tex, &start, &end, yDu, yDv); - - // Incremental update - xLeft += dx02; - xRight += dx01; - } - - // Scanline for the lower part of the triangle - xRight = x1; // Restart the right side from the second vertex - for (int y = yMiddle; y < yBottom; y++) { - float dy = (float)y - y0; - float t1 = dy * inv_height; - float t2 = (float)(y - y1) * inv_y2y1; - - // Optimized interpolation - start = sw_lerp_vertex(v0, v2, t1); - end = sw_lerp_vertex(v1, v2, t2); - start.screen[0] = xLeft; - start.screen[1] = (float)y; - end.screen[0] = xRight; - end.screen[1] = (float)y; - - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } - sw_raster_scanline(tex, &start, &end, yDu, yDv); - - // Incremental update - xLeft += dx02; - xRight += dx12; - } - } - - void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) - { - int vertexCounter = 3; - - sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; - polygon[0] = *v0; - polygon[1] = *v1; - polygon[2] = *v2; - - sw_project_and_clip_triangle(polygon, &vertexCounter); - - if (vertexCounter < 3) { - return; - } - - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } - } - - static inline bool sw_is_texture_id_valid(uint32_t id) - { - bool valid = true; - - if (id == 0) valid = false; - else if (id >= SW_MAX_TEXTURES) valid = false; - else if (RLSW.loadedTextures[id].pixels == 0) valid = false; - - return true; - } - - static inline bool sw_is_texture_filter_valid(int filter) - { - return (filter == SW_NEAREST || filter == SW_LINEAR); - } - - static inline bool sw_is_texture_wrap_valid(int wrap) - { - return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); - } - - /* === Public Implementation === */ - - void swInit(int w, int h) - { - swViewport(0, 0, w, h); - - RLSW.framebuffer.color = SW_MALLOC(4 * w * h); - RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); - - RLSW.framebuffer.width = w; - RLSW.framebuffer.height = h; - - RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); - RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); - - RLSW.clearColor[0] = 0; - RLSW.clearColor[1] = 0; - RLSW.clearColor[2] = 0; - RLSW.clearColor[3] = 255; - RLSW.clearDepth = UINT16_MAX; - - RLSW.currentMatrixMode = SW_MODELVIEW; - RLSW.currentMatrix = &RLSW.matView; - - sw_matrix_id(RLSW.matProjection); - sw_matrix_id(RLSW.matTexture); - sw_matrix_id(RLSW.matModel); - sw_matrix_id(RLSW.matView); - - RLSW.vertexBuffer[0].color[0] = 1.0f; - RLSW.vertexBuffer[0].color[1] = 1.0f; - RLSW.vertexBuffer[0].color[2] = 1.0f; - RLSW.vertexBuffer[0].color[3] = 1.0f; - - RLSW.vertexBuffer[0].texcoord[0] = 0.0f; - RLSW.vertexBuffer[0].texcoord[1] = 0.0f; - - RLSW.vertexBuffer[0].normal[0] = 0.0f; - RLSW.vertexBuffer[0].normal[1] = 0.0f; - RLSW.vertexBuffer[0].normal[2] = 1.0f; - - static const float defTex[3*2*2] = - { - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - }; - - RLSW.loadedTextures[0].pixels = defTex; - RLSW.loadedTextures[0].width = 2; - RLSW.loadedTextures[0].height = 2; - RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; - RLSW.loadedTextures[0].minFilter = SW_NEAREST; - RLSW.loadedTextures[0].magFilter = SW_NEAREST; - RLSW.loadedTextures[0].sWrap = SW_REPEAT; - RLSW.loadedTextures[0].tWrap = SW_REPEAT; - RLSW.loadedTextures[0].tx = 0.5f; - RLSW.loadedTextures[0].ty = 0.5f; - - RLSW.loadedTextureCount = 1; - } - - void swClose(void) - { - SW_FREE(RLSW.framebuffer.color); - SW_FREE(RLSW.framebuffer.depth); - - SW_FREE(RLSW.loadedTextures); - SW_FREE(RLSW.freeTextureIds); - } - - void* swGetColorBuffer(int* w, int* h) - { - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; - - return RLSW.framebuffer.color; - } - - void swMatrixMode(SWmatrix mode) - { - switch (mode) { - case SW_PROJECTION: - RLSW.currentMatrix = &RLSW.matProjection; - break; - case SW_MODELVIEW: - RLSW.currentMatrix = RLSW.modelMatrixUsed - ? &RLSW.matModel : &RLSW.matView; - break; - case SW_TEXTURE: - RLSW.currentMatrix = &RLSW.matTexture; - break; - default: - RLSW.errCode = SW_INVALID_ENUM; - return; - } - - RLSW.currentMatrixMode = mode; - } - - void swPushMatrix(void) - { - switch (RLSW.currentMatrixMode) { - - case SW_PROJECTION: - if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - for (int i = 0; i < 16; i++) { - RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; - } - RLSW.stackProjectionCounter++; - break; - - case SW_MODELVIEW: - if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - if (RLSW.modelMatrixUsed) { - for (int i = 0; i < 16; i++) { - RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; - } - RLSW.stackModelviewCounter++; - } else { - RLSW.currentMatrix = &RLSW.matModel; - RLSW.modelMatrixUsed = true; - } - break; - - case SW_TEXTURE: - if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - for (int i = 0; i < 16; i++) { - RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; - } - RLSW.stackTextureCounter++; - break; - - } - } - - void swPopMatrix(void) - { - switch (RLSW.currentMatrixMode) { - - case SW_PROJECTION: - if (RLSW.stackProjectionCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackProjectionCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; - } - break; - - case SW_MODELVIEW: - if (RLSW.stackModelviewCounter == 0) { - if (!RLSW.modelMatrixUsed) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - sw_matrix_id(RLSW.matModel); - RLSW.currentMatrix = &RLSW.matView; - RLSW.modelMatrixUsed = false; - } else { - RLSW.stackModelviewCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; - } - } - break; - - case SW_TEXTURE: - if (RLSW.stackTextureCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackTextureCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; - } - break; - - } - } - - void swLoadIdentity(void) - { - sw_matrix_id(*RLSW.currentMatrix); - } - - void swTranslatef(float x, float y, float z) - { - sw_matrix_t mat; - sw_matrix_id(mat); - - mat[12] = x; - mat[13] = y; - mat[14] = z; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swRotatef(float angle, float x, float y, float z) - { - angle *= SW_DEG2RAD; - - sw_matrix_t mat; - sw_matrix_id(mat); - - float lengthSq = x*x + y*y + z*z; - - if (lengthSq != 1.0f && lengthSq != 0.0f) { - float invLenght = 1.0f / lengthSq; - x *= invLenght; - y *= invLenght; - z *= invLenght; - } - - float sinres = sinf(angle); - float cosres = cosf(angle); - float t = 1.0f - cosres; - - mat[0] = x*x*t + cosres; - mat[1] = y*x*t + z*sinres; - mat[2] = z*x*t - y*sinres; - - mat[4] = x*y*t - z*sinres; - mat[5] = y*y*t + cosres; - mat[6] = z*y*t + x*sinres; - - mat[8] = x*z*t + y*sinres; - mat[9] = y*z*t - x*sinres; - mat[10] = z*z*t + cosres; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swScalef(float x, float y, float z) - { - sw_matrix_t mat; - - mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; - mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; - mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; - mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swMultMatrixf(const float* mat) - { - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) - { - sw_matrix_t mat = { 0 }; - - float rl = right - left; - float tb = top - bottom; - float fn = zfar - znear; - - mat[0] = (znear * 2.0f) / rl; - mat[5] = (znear * 2.0f) / tb; - - mat[8] = (right + left) / rl; - mat[9] = (top + bottom) / tb; - mat[10] = -(zfar + znear) / fn; - mat[11] = -1.0f; - - mat[14] = -(zfar * znear * 2.0f) / fn; - - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) - { - sw_matrix_t mat = { 0 }; - - float rl = (right - left); - float tb = (top - bottom); - float fn = (zfar - znear); - - mat[0] = 2.0f / rl; - mat[5] = 2.0f / tb; - - mat[10] = -2.0f / fn; - mat[11] = 0.0f; - mat[12] = -(left + right) / rl; - mat[13] = -(top + bottom) / tb; - - mat[14] = -(zfar + znear) / fn; - mat[15] = 1.0f; - - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swViewport(int x, int y, int width, int height) - { - if (x <= -width || y <= -height) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.vpPos[0] = x; - RLSW.vpPos[1] = y; - - RLSW.vpDim[0] = width - 1; - RLSW.vpDim[1] = height - 1; - - RLSW.vpMin[0] = (x < 0) ? 0 : x; - RLSW.vpMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; - } - - void swClearColor(float r, float g, float b, float a) - { - RLSW.clearColor[0] = r * 255; - RLSW.clearColor[1] = g * 255; - RLSW.clearColor[2] = b * 255; - RLSW.clearColor[3] = a * 255; - } - - void swClear(void) - { - int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - - for (int i = 0; i < size; i++) { - ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); - RLSW.framebuffer.depth[i] = RLSW.clearDepth; - } - } - - void swBegin(SWfill mode) - { - if (mode < SW_POINTS || mode > SW_QUADS) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.vertexCounter = 0; - RLSW.fillMode = mode; - } - - void swEnd(void) - { - RLSW.vertexCounter = 0; - } - - void swVertex2i(int x, int y) - { - float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; - swVertex4fv(v); - } - - void swVertex2f(float x, float y) - { - float v[4] = { x, y, 0.0f, 1.0f }; - swVertex4fv(v); - } - - void swVertex2fv(const float* v) - { - float v4[4] = { v[0], v[1], 0.0f, 1.0f }; - swVertex4fv(v4); - } - - void swVertex3i(int x, int y, int z) - { - float v[4] = { (float)x, (float)y, (float)z, 1.0f }; - swVertex4fv(v); - } - - void swVertex3f(float x, float y, float z) - { - float v[4] = { x, y, z, 1.0f }; - swVertex4fv(v); - } - - void swVertex3fv(const float* v) - { - float v4[4] = { v[0], v[1], v[2], 1.0f }; - swVertex4fv(v4); - } - - void swVertex4i(int x, int y, int z, int w) - { - float v[4] = { (float)x, (float)y, (float)z, (float)w }; - swVertex4fv(v); - } - - void swVertex4f(float x, float y, float z, float w) - { - float v[4] = { x, y, z, w }; - swVertex4fv(v); - } - - void swVertex4fv(const float* v) - { - for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; - } - RLSW.vertexCounter++; - - int neededVertices = 0; - switch (RLSW.fillMode) { - case SW_POINTS: - neededVertices = 1; - break; - case SW_LINES: - neededVertices = 2; - break; - case SW_TRIANGLES: - neededVertices = 3; - break; - case SW_QUADS: - neededVertices = 4; - break; - } - - if (RLSW.vertexCounter == neededVertices) { - - // TODO: Optimize MVP calculation - sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); - sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); - - switch (RLSW.fillMode) { - case SW_POINTS: - break; - case SW_LINES: - neededVertices = 2; - break; - case SW_TRIANGLES: - sw_render_triangle( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - break; - case SW_QUADS: - sw_render_triangle( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - sw_render_triangle( - &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3], - &RLSW.vertexBuffer[0] - ); - break; - } - - RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; - RLSW.vertexCounter = 0; - } - else { - RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; - } - } - - void swColor1ui(uint32_t color) - { - union { - uint32_t v; - uint8_t a[4]; - } c = { .v = color }; - - float cv[4]; - cv[0] = (float)c.a[0] / 255; - cv[1] = (float)c.a[1] / 255; - cv[2] = (float)c.a[2] / 255; - cv[3] = (float)c.a[3] / 255; - - swColor4fv(cv); - } - - void swColor3ub(uint8_t r, uint8_t g, uint8_t b) - { - float cv[4]; - cv[0] = (float)r / 255; - cv[1] = (float)g / 255; - cv[2] = (float)b / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3ubv(const uint8_t* v) - { - float cv[4]; - cv[0] = (float)v[0] / 255; - cv[1] = (float)v[1] / 255; - cv[2] = (float)v[2] / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3us(uint16_t r, uint16_t g, uint16_t b) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3usv(const uint16_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3ui(uint32_t r, uint32_t g, uint32_t b) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3uiv(const uint32_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3f(float r, float g, float b) - { - float cv[4]; - cv[0] = r; - cv[1] = g; - cv[2] = b; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3fv(const float* v) - { - float cv[4]; - cv[0] = v[0]; - cv[1] = v[1]; - cv[2] = v[2]; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) - { - float cv[4]; - cv[0] = (float)r / 255; - cv[1] = (float)g / 255; - cv[2] = (float)b / 255; - cv[3] = (float)a / 255; - - swColor4fv(cv); - } - - void swColor4ubv(const uint8_t* v) - { - float cv[4]; - cv[0] = (float)v[0] / 255; - cv[1] = (float)v[1] / 255; - cv[2] = (float)v[2] / 255; - cv[3] = (float)v[3] / 255; - - swColor4fv(cv); - } - - void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = (float)((uint8_t)(a >> 8)) / 255; - - swColor4fv(cv); - } - - void swColor4usv(const uint16_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; - - swColor4fv(cv); - } - - void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = (float)((uint8_t)(a >> 24)) / 255; - - swColor4fv(cv); - } - - void swColor4uiv(const uint32_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; - - swColor4fv(cv); - } - - void swColor4f(float r, float g, float b, float a) - { - float cv[4]; - cv[0] = r; - cv[1] = g; - cv[2] = b; - cv[3] = a; - - swColor4fv(cv); - } - - void swColor4fv(const float* v) - { - for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; - } - } - - void swTexCoord2f(float u, float v) - { - float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; - } - - void swTexCoordfv(const float* v) - { - float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; - } - - void swNormal3f(float x, float y, float z) - { - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; - } - - void swNormal3fv(const float* v) - { - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; - } - - void swBindArray(SWarray type, void *buffer) - { - switch (type) { - case SW_VERTEX_ARRAY: - RLSW.array.positions = buffer; - break; - case SW_TEXTURE_COORD_ARRAY: - RLSW.array.texcoords = buffer; - break; - case SW_NORMAL_ARRAY: - RLSW.array.normals = buffer; - break; - case SW_COLOR_ARRAY: - RLSW.array.colors = buffer; - break; - default: - break; - } - } - - void swDrawArrays(SWfill mode, int offset, int count) - { - if (RLSW.array.positions == 0) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - swBegin(mode); - - for (int i = offset; i < count; i++) { - if (RLSW.array.texcoords) { - swTexCoordfv(RLSW.array.texcoords + 2 * i); - } - if (RLSW.array.normals) { - swNormal3fv(RLSW.array.normals + 3 * i); - } - if (RLSW.array.colors) { - swColor4ubv(RLSW.array.colors + 4 * i); - } - swVertex3fv(RLSW.array.positions + 3 * i); - } - - swEnd(); - } - - uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) - { - if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_ERROR_OUT_OF_MEMORY; - return 0; - } - - sw_texture_t texture = { 0 }; - texture.pixels = data; - texture.width = width; - texture.height = height; - texture.format = format; - texture.minFilter = SW_NEAREST; - texture.magFilter = SW_NEAREST; - texture.sWrap = SW_REPEAT; - texture.tWrap = SW_REPEAT; - texture.tx = 1.0f / width; - texture.ty = 1.0f / height; - (void)mipmapCount; - - uint32_t id = 0; - if (RLSW.freeTextureIdCount > 0) { - id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; - } - else { - id = RLSW.loadedTextureCount++; - } - - RLSW.loadedTextures[id] = texture; - - return id; - } - - void swUnloadTexture(uint32_t id) - { - if (!sw_is_texture_id_valid(id)) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - RLSW.loadedTextures[id].pixels = 0; - RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; - } - - void swTextureParameters(uint32_t id, int param, int value) - { - if (!sw_is_texture_id_valid(id)) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - sw_texture_t* texture = &RLSW.loadedTextures[id]; - - switch (param) { - - case SW_TEXTURE_MIN_FILTER: - if (!sw_is_texture_filter_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->minFilter = value; - break; - - case SW_TEXTURE_MAG_FILTER: - if (!sw_is_texture_filter_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->magFilter = value; - break; - - case SW_TEXTURE_WRAP_S: - if (!sw_is_texture_wrap_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->sWrap = value; - break; - - case SW_TEXTURE_WRAP_T: - if (!sw_is_texture_wrap_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->tWrap = value; - break; - - default: - RLSW.errCode = SW_INVALID_ENUM; - return; - - } - } - - void swBindTexture(uint32_t id) - { - if (id >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.currentTexture = id; - } - - #endif // RLSW_IMPL - \ No newline at end of file +#ifndef RLSW_H +#define RLSW_H + +#include +#include + + +/* === RLSW Definition And Macros === */ + +#ifndef SW_MALLOC +# define SW_MALLOC(sz) malloc(sz) +#endif + +#ifndef SW_FREE +# define SW_FREE(ptr) free(ptr) +#endif + +#ifndef SW_MAX_PROJECTION_STACK_SIZE +# define SW_MAX_PROJECTION_STACK_SIZE 2 +#endif + +#ifndef SW_MAX_MODELVIEW_STACK_SIZE +# define SW_MAX_MODELVIEW_STACK_SIZE 8 +#endif + +#ifndef SW_MAX_TEXTURE_STACK_SIZE +# define SW_MAX_TEXTURE_STACK_SIZE 4 +#endif + +#ifndef SW_MAX_TEXTURES +# define SW_MAX_TEXTURES 128 +#endif + +#ifndef SW_MAX_CLIPPED_POLYGON_VERTICES +# define SW_MAX_CLIPPED_POLYGON_VERTICES 12 +#endif + +#ifndef SW_CLIP_EPSILON +# define SW_CLIP_EPSILON 1e-4f +#endif + + +/* === OpenGL Definitions === */ + +#define GL_TEXTURE_2D 0x0DE1 +#define GL_DEPTH_TEST 0x0B71 +#define GL_CULL_FACE 0x0B44 + +#define GL_MODELVIEW 0x1700 +#define GL_PROJECTION 0x1701 +#define GL_TEXTURE 0x1702 + +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +//#define GL_INDEX_ARRAY 0x8077 +#define GL_TEXTURE_COORD_ARRAY 0x8078 + +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +//#define GL_LINE_LOOP 0x0002 +//#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +//#define GL_TRIANGLE_STRIP 0x0005 +//#define GL_TRIANGLE_FAN 0x0006 +#define GL_QUADS 0x0007 +//#define GL_QUAD_STRIP 0x0008 +//#define GL_POLYGON 0x0009 + +//#define GL_CW 0x0900 +//#define GL_CCW 0x0901 + +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 + +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) +#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) + +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 + +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 + +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_OUT_OF_MEMORY 0x0505 + + +/* === RLSW Enums === */ + +typedef enum { + SW_TEXTURE_2D = GL_TEXTURE_2D, + SW_DEPTH_TEST = GL_DEPTH_TEST, + SW_CULL_FACE = GL_CULL_FACE +} SWstate; + +typedef enum { + SW_PROJECTION = GL_PROJECTION, + SW_MODELVIEW = GL_MODELVIEW, + SW_TEXTURE = GL_TEXTURE +} SWmatrix; + +typedef enum { + SW_VERTEX_ARRAY = GL_VERTEX_ARRAY, + SW_TEXTURE_COORD_ARRAY = GL_TEXTURE_COORD_ARRAY, + SW_NORMAL_ARRAY = GL_NORMAL_ARRAY, + SW_COLOR_ARRAY = GL_COLOR_ARRAY +} SWarray; + +typedef enum { + SW_POINTS = GL_POINTS, + SW_LINES = GL_LINES, + SW_TRIANGLES = GL_TRIANGLES, + SW_QUADS = GL_QUADS, +} SWfill; + +typedef enum { + SW_FRONT = GL_FRONT, + SW_BACK = GL_BACK, +} SWface; + +typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp +} SWpixelformat; + +typedef enum { + SW_NEAREST = GL_NEAREST, + SW_LINEAR = GL_LINEAR +} SWfilter; + +typedef enum { + SW_REPEAT = GL_REPEAT, + SW_CLAMP_TO_EDGE = GL_CLAMP_TO_EDGE, + SW_MIRRORED_REPEAT = GL_MIRRORED_REPEAT +} SWwrap; + +typedef enum { + SW_TEXTURE_MIN_FILTER = GL_TEXTURE_MIN_FILTER, + SW_TEXTURE_MAG_FILTER = GL_TEXTURE_MAG_FILTER, + SW_TEXTURE_WRAP_S = GL_TEXTURE_WRAP_S, + SW_TEXTURE_WRAP_T = GL_TEXTURE_WRAP_T +} SWtexparam; + +typedef enum { + SW_NO_ERROR = GL_NO_ERROR, + SW_INVALID_ENUM = GL_INVALID_ENUM, + SW_INVALID_VALUE = GL_INVALID_VALUE, + SW_STACK_OVERFLOW = GL_STACK_OVERFLOW, + SW_STACK_UNDERFLOW = GL_STACK_UNDERFLOW, + SW_INVALID_OPERATION = GL_INVALID_OPERATION, +} SWerrcode; + +/* === Public API === */ + +void swInit(int w, int h); +void swClose(void); + +void swEnable(SWstate state); +void swDisable(SWstate state); + +void* swGetColorBuffer(int* w, int* h); + +void swMatrixMode(SWmatrix mode); +void swPushMatrix(void); +void swPopMatrix(void); +void swLoadIdentity(void); +void swTranslatef(float x, float y, float z); +void swRotatef(float angle, float x, float y, float z); +void swScalef(float x, float y, float z); +void swMultMatrixf(const float* mat); +void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); +void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); + +void swViewport(int x, int y, int width, int height); + +void swClearColor(float r, float g, float b, float a); +void swClear(void); + +void swBegin(SWfill mode); +void swEnd(void); + +void swVertex2i(int x, int y); +void swVertex2f(float x, float y); +void swVertex2fv(const float* v); +void swVertex3i(int x, int y, int z); +void swVertex3f(float x, float y, float z); +void swVertex3fv(const float* v); +void swVertex4i(int x, int y, int z, int w); +void swVertex4f(float x, float y, float z, float w); +void swVertex4fv(const float* v); + +void swColor1ui(uint32_t color); +void swColor3ub(uint8_t r, uint8_t g, uint8_t b); +void swColor3ubv(const uint8_t* v); +void swColor3us(uint16_t r, uint16_t g, uint16_t b); +void swColor3usv(const uint16_t* v); +void swColor3ui(uint32_t r, uint32_t g, uint32_t b); +void swColor3uiv(const uint32_t* v); +void swColor3f(float r, float g, float b); +void swColor3fv(const float* v); +void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); +void swColor4ubv(const uint8_t* v); +void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); +void swColor4usv(const uint16_t* v); +void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); +void swColor4uiv(const uint32_t* v); +void swColor4f(float r, float g, float b, float a); +void swColor4fv(const float* v); + +void swTexCoord2f(float u, float v); +void swTexCoordfv(const float* v); + +void swNormal3f(float x, float y, float z); +void swNormal3fv(const float* v); + +void swBindArray(SWarray type, void *buffer); +void swDrawArrays(SWfill mode, int offset, int count); + +uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); +void swUnloadTexture(uint32_t id); + +void swTextureParameters(uint32_t id, int param, int value); +void swBindTexture(uint32_t id); + +#endif // RLSW_H + + +#ifdef RLSW_IMPL + +#include +#include + +/* === Defines and Macros === */ + +#define SW_PI 3.14159265358979323846f +#define SW_DEG2RAD (SW_PI/180.0f) +#define SW_RAD2DEG (180.0f/SW_PI) + +#define SW_STATE_TEXTURE_2D (1 << 0) +#define SW_STATE_DEPTH_TEST (1 << 1) +#define SW_STATE_CULL_FACE (1 << 2) + +/* === Internal Structs === */ + +typedef float sw_matrix_t[4*4]; +typedef uint16_t sw_half_t; + +typedef struct { + + float position[4]; // Position coordinates + float normal[3]; // Normal vector + float texcoord[2]; // Texture coordinates + float color[4]; // Color + + float homogeneous[4]; // Homogeneous coordinates + float screen[2]; // Screen coordinates + +} sw_vertex_t; + +typedef struct { + + const void* pixels; + int width; + int height; + int format; + + SWfilter minFilter; + SWfilter magFilter; + + SWwrap sWrap; + SWwrap tWrap; + + float tx; + float ty; + +} sw_texture_t; + +typedef struct { + uint8_t *color; // 32-bit RGBA color buffer + uint16_t *depth; // 16-bit fixed fract buffer + int width, height; +} sw_framebuffer_t; + +typedef struct { + + sw_framebuffer_t framebuffer; + uint8_t clearColor[4]; // Color used to clear the screen + uint16_t clearDepth; // Depth value used to clear the screen + + uint32_t currentTexture; + sw_matrix_t *currentMatrix; + + uint32_t blendFunction; + uint32_t depthFunction; + + int vpPos[2]; // Represents the top-left corner of the viewport + int vpDim[2]; // Represents the dimensions of the viewport (minus one) + int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) + int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + + struct { + float* positions; + float* texcoords; + float* normals; + uint8_t* colors; + } array; + + sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering + int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' + + SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + float pointSize; // Rasterized point size + float lineWidth; // Rasterized line width + + sw_matrix_t matProjection; // Projection matrix, user adjustable + sw_matrix_t matTexture; // Texture matrix, user adjustable + sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) + sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) + sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally + + sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations + sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations + sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations + uint32_t stackProjectionCounter; // Counter for matrix stack operations + uint32_t stackModelviewCounter; // Counter for matrix stack operations + uint32_t stackTextureCounter; // Counter for matrix stack operations + + SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) + bool modelMatrixUsed; // Flag indicating if the model matrix is used + + SWface cullFace; // Faces to cull + SWerrcode errCode; // Last error code + + sw_texture_t* loadedTextures; + int loadedTextureCount; + + uint32_t* freeTextureIds; + int freeTextureIdCount; + + uint32_t stateFlags; + +} sw_data_t; + + +/* === Global Data === */ + +static sw_data_t RLSW = { 0 }; + + +/* === Helper Functions === */ + +static inline void sw_matrix_id(sw_matrix_t dst) +{ + dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; + dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; + dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; + dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; +} + +static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) +{ + sw_matrix_t result; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float sum = 0.0; + for (int k = 0; k < 4; k++) { + sum += left[i * 4 + k] * right[k * 4 + j]; + } + result[i * 4 + j] = sum; + } + } + for (int i = 0; i < 16; i++) { + dst[i] = result[i]; + } +} + +static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) +{ + float tmp[4] = { + mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], + mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], + mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], + mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] + }; + + for (int i = 0; i < 4; i++) { + dst[i] = tmp[i]; + } +} + +static inline float sw_lerp(float a, float b, float t) +{ + return a + t * (b - a); +} + +static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) +{ + sw_vertex_t result; + for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); + } + return result; +} + +static inline uint32_t sw_cvt_hf_ui(uint16_t h) +{ + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; +} + +static inline float sw_cvt_hf(sw_half_t y) +{ + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; +} + +static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) +{ + float gray = (float)((uint8_t*)pixels)[offset] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) +{ + float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) +{ + float value = ((float*)pixels)[offset]; + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) +{ + float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; + float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = alpha; +} + +static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7E0) >> 5) / 63; + color[2] = (float)(pixel & 0x1F) / 31; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) +{ + const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) +{ + const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) +{ + const float *pixel = (float*)pixels + 3 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7C0) >> 6) / 31; + color[2] = (float)((pixel & 0x3E) >> 1) / 31; + color[3] = (float)(pixel & 0x1); +} + +static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF000) >> 12) / 15; + color[1] = (float)((pixel & 0xF00) >> 8) / 15; + color[2] = (float)((pixel & 0xF0) >> 4) / 15; + color[3] = (float)(pixel & 0xF) / 15; +} + +static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) +{ + const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = (float)pixel[3] / 255; +} + +static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) +{ + const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = sw_cvt_hf(pixel[3]); +} + +static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) +{ + const float *pixel = (float*)pixels + 4 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = pixel[3]; +} + +static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) +{ + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_get_pixel_grayscale(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_get_pixel_grayscale_alpha(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_get_pixel_rgb_565(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_get_pixel_rgb_888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_get_pixel_rgba_5551(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_get_pixel_rgba_4444(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_get_pixel_rgba_8888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_get_pixel_red_32(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_get_pixel_rgb_323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_get_pixel_rgba_32323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_get_pixel_red_16(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_get_pixel_rgb_161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_get_pixel_rgba_16161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } +} + +static inline void sw_map_repeat(int* out, float in, int max) +{ + // Upscale to nearest texture coordinates + // NOTE: We use '(int)(x+0.5)' although this is incorrect + // regarding the direction of rounding in case of negative values + // and also less accurate than roundf, but it remains so much more + // efficient that it is preferable for now to opt for this option. + + *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); +} + +static inline void sw_map_clamp_to_edge(int* out, float in, int max) +{ + in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); + *out = (int)(in * (max - 1) + 0.5f); +} + +static inline void sw_map_mirrored_repeat(int* out, float in, int max) +{ + in = fmodf(fabsf(in), 2); + if (in > 1.0f) in = 1.0f - (in - 1.0f); + *out = (int)(in * (max - 1) + 0.5f); +} + +static inline void sw_map(int* out, float in, int max, SWwrap mode) +{ + switch (mode) { + case SW_REPEAT: + sw_map_repeat(out, in, max); + break; + case SW_CLAMP_TO_EDGE: + sw_map_clamp_to_edge(out, in, max); + break; + case SW_MIRRORED_REPEAT: + sw_map_mirrored_repeat(out, in, max); + break; + } +} + +static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) +{ + int x, y; + sw_map(&x, u, tex->width, tex->sWrap); + sw_map(&y, v, tex->height, tex->tWrap); + sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); +} + +static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) +{ + int x0, y0, x1, y1; + sw_map(&x0, u, tex->width, tex->sWrap); + sw_map(&y0, v, tex->height, tex->tWrap); + sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + + float fx = u * (tex->width - 1) - x0; + float fy = v * (tex->height - 1) - y0; + + float c00[4], c10[4], c01[4], c11[4]; + sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + + float c0[4], c1[4]; + for (int i = 0; i < 4; i++) { + float a = sw_lerp(c00[i], c10[i], fx); + float b = sw_lerp(c01[i], c11[i], fx); + color[i] = sw_lerp(a, b, fy); + } +} + +static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, + float xDu, float yDu, float xDv, float yDv) +{ + // TODO: It seems there are some incorrect detections depending on the context + // This is probably due to the fact that the fractions are obtained + // at the wrong moment during rasterization. It would be worth reviewing + // this, although the scanline method complicates things. + + // Calculate the derivatives for each axis + float du = sqrtf(xDu * xDu + yDu * yDu); + float dv = sqrtf(xDv * xDv + yDv * yDv); + float L = (du > dv) ? du : dv; + + // Select the filter based on the size of the footprint + if (L > 1.0f) { + // Minification + if (tex->minFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->minFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } else { + // Magnification + if (tex->magFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->magFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } +} + +static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + + int inputCounter = *vertexCounter; + *vertexCounter = 0; + + const sw_vertex_t *prevVt = &input[inputCounter-1]; + char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + if (prevDot*currDot < 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + return *vertexCounter > 0; +} + +static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + for (int iAxis = 0; iAxis < 3; iAxis++) + { + if (*vertexCounter == 0) return false; + + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int inputCounter; + + const sw_vertex_t *prevVt; + char prevDot; + + // Clip against first plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot * currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + if (*vertexCounter == 0) return false; + + // Clip against opposite plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot*currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + } + + return *vertexCounter > 0; +} + +void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; + sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + } + + if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + + // Calculation of the reciprocal of W for normalization + // as well as perspective correct attributes + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + + // Division of XYZ coordinates by weight + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + + // Division of texture coordinates (perspective correct) + v->texcoord[0] *= v->homogeneous[3]; + v->texcoord[1] *= v->homogeneous[3]; + + // Transform to screen space + v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + } + } +} + +#define DEFINE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ + const sw_vertex_t* end, float yDu, float yDv) \ +{ \ + /* Calculate the horizontal width and avoid division by zero */ \ + float dx = end->screen[0] - start->screen[0]; \ + if (fabsf(dx) < 1e-4f) return; \ + \ + /* Convert and center the screen coordinates */ \ + int xStart = (int)(start->screen[0] + 0.5f); \ + int xEnd = (int)(end->screen[0] + 0.5f); \ + int y = (int)(start->screen[1] + 0.5f); \ + \ + /* Calculate the initial interpolation parameter and its increment */ \ + float dt = 1.0f / dx; \ + float t = (xStart - start->screen[0]) * dt; \ + \ + float xDu, xDv; \ + if (ENABLE_TEXTURE) { \ + /* Calculate the horizontal gradients for UV coordinates */ \ + xDu = (end->texcoord[0] - start->texcoord[0]) * dt; \ + xDv = (end->texcoord[1] - start->texcoord[1]) * dt; \ + } \ + \ + /* Pre-calculate the color differences for interpolation */ \ + float dcol[4]; \ + for (int i = 0; i < 4; i++) { \ + dcol[i] = end->color[i] - start->color[i]; \ + } \ + \ + /* Pre-calculate the differences in Z and W \ + (for depth testing and perspective correction) */ \ + float dz = end->homogeneous[2] - start->homogeneous[2]; \ + float dw = end->homogeneous[3] - start->homogeneous[3]; \ + \ + float u, v; \ + if (ENABLE_TEXTURE) { \ + /* Initialize the interpolated texture coordinates */ \ + u = start->texcoord[0] + t * xDu; \ + v = start->texcoord[1] + t * xDv; \ + } \ + \ + /* Pre-calculate the starting pointer for the color framebuffer row */ \ + uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ + uint8_t* dst = row_ptr + xStart * 4; \ + \ + /* Pre-calculate the pointer for the depth buffer row */ \ + uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ + uint16_t* dptr = depth_row; \ + \ + /* Scanline rasterization loop */ \ + for (int x = xStart; x < xEnd; x++) { \ + /* Interpolate Z and W for depth testing and perspective correction */ \ + float w = 1.0f / (start->homogeneous[3] + t * dw); \ + float z = start->homogeneous[2] + t * dz; \ + \ + if (ENABLE_DEPTH_TEST) { \ + /* Depth testing with direct access to the depth buffer */ \ + /* TODO: Implement different depth funcs? */ \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) goto discard; \ + } \ + \ + /* Update the depth buffer */ \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + if (ENABLE_TEXTURE) \ + { \ + /* Sample the texture */ \ + float texColor[4]; \ + sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + \ + /* Interpolate the color and modulate by the texture color */ \ + for (int i = 0; i < 4; i++) { \ + float lerp = start->color[i] + t * dcol[i]; \ + float finalColor = texColor[i] * lerp; \ + /* Inline clamp to keep the value between 0 and 1 */ \ + /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ + dst[i] = (uint8_t)(finalColor * 255.0f); \ + } \ + } \ + else \ + { \ + /* Interpolate the color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = start->color[i] + t * dcol[i]; \ + /* Inline clamp to keep the value between 0 and 1 */ \ + /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ + dst[i] = (uint8_t)(finalColor * 255.0f); \ + } \ + } \ + \ + /* Increment the interpolation parameter, UVs, and pointers */ \ + discard: \ + t += dt; \ + dst += 4; \ + dptr++; \ + if (ENABLE_TEXTURE) { \ + u += xDu; \ + v += xDv; \ + } \ + } \ +} + +#define DEFINE_RASTER_TRIANGLE(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ +void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ + const sw_texture_t* tex) \ +{ \ + /* Swap vertices by increasing y */ \ + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ + if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } \ + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ + \ + /* Extracting coordinates from the sorted vertices */ \ + float x0 = v0->screen[0], y0 = v0->screen[1]; \ + float x1 = v1->screen[0], y1 = v1->screen[1]; \ + float x2 = v2->screen[0], y2 = v2->screen[1]; \ + \ + /* Reject degenerate triangles */ \ + float height = y2 - y0; \ + if (height < 1e-4f) return; \ + \ + /* Precompute the inverse of the triangle height and */ \ + /* edge lengths with checks to avoid division by zero. */ \ + float inv_height = 1.0f / height; \ + float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; \ + float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; \ + \ + /* Pre-calculation of slopes (dx/dy) */ \ + float dx02 = (x2 - x0) * inv_height; \ + float dx01 = (x1 - x0) * inv_y1y0; \ + float dx12 = (x2 - x1) * inv_y2y1; \ + \ + /* Y bounds (vertical clipping) */ \ + int yTop = (int)(y0 + 0.5f); \ + int yMiddle = (int)(y1 + 0.5f); \ + int yBottom = (int)(y2 + 0.5f); \ + \ + /* Global calculation of vertical texture gradients for the triangle */ \ + float yDu, yDv; \ + if (ENABLE_TEXTURE) { \ + yDu = (v2->texcoord[0] - v0->texcoord[0]) * inv_height; \ + yDv = (v2->texcoord[1] - v0->texcoord[1]) * inv_height; \ + } \ + \ + /* Initializing scanline variables */ \ + float xLeft = x0, xRight = x0; \ + sw_vertex_t start, end; \ + \ + /* Scanline for the upper part of the triangle */ \ + for (int y = yTop; y < yMiddle; y++) { \ + float dy = (float)y - y0; \ + float t1 = dy * inv_height; \ + float t2 = dy * inv_y1y0; \ + \ + /* Vertex interpolation */ \ + start = sw_lerp_vertex(v0, v2, t1); \ + end = sw_lerp_vertex(v0, v1, t2); \ + start.screen[0] = xLeft; \ + start.screen[1] = (float)y; \ + end.screen[0] = xRight; \ + end.screen[1] = (float)y; \ + \ + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + \ + /* Incremental update */ \ + xLeft += dx02; \ + xRight += dx01; \ + } \ + \ + /* Scanline for the lower part of the triangle */ \ + xRight = x1; /* Restart the right side from the second vertex */ \ + for (int y = yMiddle; y < yBottom; y++) { \ + float dy = (float)y - y0; \ + float t1 = dy * inv_height; \ + float t2 = (float)(y - y1) * inv_y2y1; \ + \ + /* Vertex interpolation */ \ + start = sw_lerp_vertex(v0, v2, t1); \ + end = sw_lerp_vertex(v1, v2, t2); \ + start.screen[0] = xLeft; \ + start.screen[1] = (float)y; \ + end.screen[0] = xRight; \ + end.screen[1] = (float)y; \ + \ + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + \ + /* Incremental update */ \ + xLeft += dx02; \ + xRight += dx12; \ + } \ +} + +DEFINE_RASTER_SCANLINE(sw_raster_scanline, false, false) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex, true, false) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_depth, false, true) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex_depth, true, true) + +DEFINE_RASTER_TRIANGLE(sw_raster_triangle, sw_raster_scanline, false) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex, sw_raster_scanline_tex, true) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_depth, sw_raster_scanline_depth, false) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex_depth, sw_raster_scanline_tex_depth, true) + +void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) +{ + int vertexCounter = 3; + + sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; + polygon[0] = *v0; + polygon[1] = *v1; + polygon[2] = *v2; + + sw_project_and_clip_triangle(polygon, &vertexCounter); + + if (vertexCounter < 3) { + return; + } + + if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_tex_depth( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_tex( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_depth( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } +} + +static inline bool sw_is_texture_id_valid(uint32_t id) +{ + bool valid = true; + + if (id == 0) valid = false; + else if (id >= SW_MAX_TEXTURES) valid = false; + else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + + return true; +} + +static inline bool sw_is_texture_filter_valid(int filter) +{ + return (filter == SW_NEAREST || filter == SW_LINEAR); +} + +static inline bool sw_is_texture_wrap_valid(int wrap) +{ + return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); +} + +/* === Public Implementation === */ + +void swInit(int w, int h) +{ + swViewport(0, 0, w, h); + + RLSW.framebuffer.color = SW_MALLOC(4 * w * h); + RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + + RLSW.clearColor[0] = 0; + RLSW.clearColor[1] = 0; + RLSW.clearColor[2] = 0; + RLSW.clearColor[3] = 255; + RLSW.clearDepth = UINT16_MAX; + + RLSW.currentMatrixMode = SW_MODELVIEW; + RLSW.currentMatrix = &RLSW.matView; + + sw_matrix_id(RLSW.matProjection); + sw_matrix_id(RLSW.matTexture); + sw_matrix_id(RLSW.matModel); + sw_matrix_id(RLSW.matView); + + RLSW.vertexBuffer[0].color[0] = 1.0f; + RLSW.vertexBuffer[0].color[1] = 1.0f; + RLSW.vertexBuffer[0].color[2] = 1.0f; + RLSW.vertexBuffer[0].color[3] = 1.0f; + + RLSW.vertexBuffer[0].texcoord[0] = 0.0f; + RLSW.vertexBuffer[0].texcoord[1] = 0.0f; + + RLSW.vertexBuffer[0].normal[0] = 0.0f; + RLSW.vertexBuffer[0].normal[1] = 0.0f; + RLSW.vertexBuffer[0].normal[2] = 1.0f; + + static const float defTex[3*2*2] = + { + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + }; + + RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].width = 2; + RLSW.loadedTextures[0].height = 2; + RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + RLSW.loadedTextures[0].minFilter = SW_NEAREST; + RLSW.loadedTextures[0].magFilter = SW_NEAREST; + RLSW.loadedTextures[0].sWrap = SW_REPEAT; + RLSW.loadedTextures[0].tWrap = SW_REPEAT; + RLSW.loadedTextures[0].tx = 0.5f; + RLSW.loadedTextures[0].ty = 0.5f; + + RLSW.loadedTextureCount = 1; +} + +void swClose(void) +{ + SW_FREE(RLSW.framebuffer.color); + SW_FREE(RLSW.framebuffer.depth); + + SW_FREE(RLSW.loadedTextures); + SW_FREE(RLSW.freeTextureIds); +} + +void swEnable(SWstate state) +{ + switch (state) { + case SW_TEXTURE_2D: + RLSW.stateFlags |= SW_STATE_TEXTURE_2D; + break; + case SW_DEPTH_TEST: + RLSW.stateFlags |= SW_STATE_DEPTH_TEST; + break; + case SW_CULL_FACE: + RLSW.stateFlags |= SW_STATE_CULL_FACE; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void swDisable(SWstate state) +{ + switch (state) { + case SW_TEXTURE_2D: + RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; + break; + case SW_DEPTH_TEST: + RLSW.stateFlags &= ~SW_STATE_DEPTH_TEST; + break; + case SW_CULL_FACE: + RLSW.stateFlags &= ~SW_STATE_CULL_FACE; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void* swGetColorBuffer(int* w, int* h) +{ + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; +} + +void swMatrixMode(SWmatrix mode) +{ + switch (mode) { + case SW_PROJECTION: + RLSW.currentMatrix = &RLSW.matProjection; + break; + case SW_MODELVIEW: + RLSW.currentMatrix = RLSW.modelMatrixUsed + ? &RLSW.matModel : &RLSW.matView; + break; + case SW_TEXTURE: + RLSW.currentMatrix = &RLSW.matTexture; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + RLSW.currentMatrixMode = mode; +} + +void swPushMatrix(void) +{ + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; + } + RLSW.stackProjectionCounter++; + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + if (RLSW.modelMatrixUsed) { + for (int i = 0; i < 16; i++) { + RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; + } + RLSW.stackModelviewCounter++; + } else { + RLSW.currentMatrix = &RLSW.matModel; + RLSW.modelMatrixUsed = true; + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; + } + RLSW.stackTextureCounter++; + break; + + } +} + +void swPopMatrix(void) +{ + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackProjectionCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; + } + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter == 0) { + if (!RLSW.modelMatrixUsed) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + sw_matrix_id(RLSW.matModel); + RLSW.currentMatrix = &RLSW.matView; + RLSW.modelMatrixUsed = false; + } else { + RLSW.stackModelviewCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; + } + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackTextureCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; + } + break; + + } +} + +void swLoadIdentity(void) +{ + sw_matrix_id(*RLSW.currentMatrix); +} + +void swTranslatef(float x, float y, float z) +{ + sw_matrix_t mat; + sw_matrix_id(mat); + + mat[12] = x; + mat[13] = y; + mat[14] = z; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swRotatef(float angle, float x, float y, float z) +{ + angle *= SW_DEG2RAD; + + sw_matrix_t mat; + sw_matrix_id(mat); + + float lengthSq = x*x + y*y + z*z; + + if (lengthSq != 1.0f && lengthSq != 0.0f) { + float invLenght = 1.0f / lengthSq; + x *= invLenght; + y *= invLenght; + z *= invLenght; + } + + float sinres = sinf(angle); + float cosres = cosf(angle); + float t = 1.0f - cosres; + + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; + mat[10] = z*z*t + cosres; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swScalef(float x, float y, float z) +{ + sw_matrix_t mat; + + mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; + mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; + mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; + mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swMultMatrixf(const float* mat) +{ + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) +{ + sw_matrix_t mat = { 0 }; + + float rl = right - left; + float tb = top - bottom; + float fn = zfar - znear; + + mat[0] = (znear * 2.0f) / rl; + mat[5] = (znear * 2.0f) / tb; + + mat[8] = (right + left) / rl; + mat[9] = (top + bottom) / tb; + mat[10] = -(zfar + znear) / fn; + mat[11] = -1.0f; + + mat[14] = -(zfar * znear * 2.0f) / fn; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) +{ + sw_matrix_t mat = { 0 }; + + float rl = (right - left); + float tb = (top - bottom); + float fn = (zfar - znear); + + mat[0] = 2.0f / rl; + mat[5] = 2.0f / tb; + + mat[10] = -2.0f / fn; + mat[11] = 0.0f; + mat[12] = -(left + right) / rl; + mat[13] = -(top + bottom) / tb; + + mat[14] = -(zfar + znear) / fn; + mat[15] = 1.0f; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swViewport(int x, int y, int width, int height) +{ + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; +} + +void swClearColor(float r, float g, float b, float a) +{ + RLSW.clearColor[0] = r * 255; + RLSW.clearColor[1] = g * 255; + RLSW.clearColor[2] = b * 255; + RLSW.clearColor[3] = a * 255; +} + +void swClear(void) +{ + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + for (int i = 0; i < size; i++) { + ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); + RLSW.framebuffer.depth[i] = RLSW.clearDepth; + } +} + +void swBegin(SWfill mode) +{ + if (mode < SW_POINTS || mode > SW_QUADS) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.vertexCounter = 0; + RLSW.fillMode = mode; +} + +void swEnd(void) +{ + RLSW.vertexCounter = 0; +} + +void swVertex2i(int x, int y) +{ + float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + swVertex4fv(v); +} + +void swVertex2f(float x, float y) +{ + float v[4] = { x, y, 0.0f, 1.0f }; + swVertex4fv(v); +} + +void swVertex2fv(const float* v) +{ + float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + swVertex4fv(v4); +} + +void swVertex3i(int x, int y, int z) +{ + float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + swVertex4fv(v); +} + +void swVertex3f(float x, float y, float z) +{ + float v[4] = { x, y, z, 1.0f }; + swVertex4fv(v); +} + +void swVertex3fv(const float* v) +{ + float v4[4] = { v[0], v[1], v[2], 1.0f }; + swVertex4fv(v4); +} + +void swVertex4i(int x, int y, int z, int w) +{ + float v[4] = { (float)x, (float)y, (float)z, (float)w }; + swVertex4fv(v); +} + +void swVertex4f(float x, float y, float z, float w) +{ + float v[4] = { x, y, z, w }; + swVertex4fv(v); +} + +void swVertex4fv(const float* v) +{ + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; + } + RLSW.vertexCounter++; + + int neededVertices = 0; + switch (RLSW.fillMode) { + case SW_POINTS: + neededVertices = 1; + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + neededVertices = 3; + break; + case SW_QUADS: + neededVertices = 4; + break; + } + + if (RLSW.vertexCounter == neededVertices) { + + // TODO: Optimize MVP calculation + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + + switch (RLSW.fillMode) { + case SW_POINTS: + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_render_triangle( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } + + RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; + RLSW.vertexCounter = 0; + } + else { + RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; + } +} + +void swColor1ui(uint32_t color) +{ + union { + uint32_t v; + uint8_t a[4]; + } c = { .v = color }; + + float cv[4]; + cv[0] = (float)c.a[0] / 255; + cv[1] = (float)c.a[1] / 255; + cv[2] = (float)c.a[2] / 255; + cv[3] = (float)c.a[3] / 255; + + swColor4fv(cv); +} + +void swColor3ub(uint8_t r, uint8_t g, uint8_t b) +{ + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3ubv(const uint8_t* v) +{ + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3us(uint16_t r, uint16_t g, uint16_t b) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3usv(const uint16_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3ui(uint32_t r, uint32_t g, uint32_t b) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3uiv(const uint32_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3f(float r, float g, float b) +{ + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3fv(const float* v) +{ + float cv[4]; + cv[0] = v[0]; + cv[1] = v[1]; + cv[2] = v[2]; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) +{ + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = (float)a / 255; + + swColor4fv(cv); +} + +void swColor4ubv(const uint8_t* v) +{ + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = (float)v[3] / 255; + + swColor4fv(cv); +} + +void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = (float)((uint8_t)(a >> 8)) / 255; + + swColor4fv(cv); +} + +void swColor4usv(const uint16_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; + + swColor4fv(cv); +} + +void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = (float)((uint8_t)(a >> 24)) / 255; + + swColor4fv(cv); +} + +void swColor4uiv(const uint32_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; + + swColor4fv(cv); +} + +void swColor4f(float r, float g, float b, float a) +{ + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = a; + + swColor4fv(cv); +} + +void swColor4fv(const float* v) +{ + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; + } +} + +void swTexCoord2f(float u, float v) +{ + float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; +} + +void swTexCoordfv(const float* v) +{ + float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; +} + +void swNormal3f(float x, float y, float z) +{ + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; +} + +void swNormal3fv(const float* v) +{ + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; +} + +void swBindArray(SWarray type, void *buffer) +{ + switch (type) { + case SW_VERTEX_ARRAY: + RLSW.array.positions = buffer; + break; + case SW_TEXTURE_COORD_ARRAY: + RLSW.array.texcoords = buffer; + break; + case SW_NORMAL_ARRAY: + RLSW.array.normals = buffer; + break; + case SW_COLOR_ARRAY: + RLSW.array.colors = buffer; + break; + default: + break; + } +} + +void swDrawArrays(SWfill mode, int offset, int count) +{ + if (RLSW.array.positions == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + swBegin(mode); + + for (int i = offset; i < count; i++) { + if (RLSW.array.texcoords) { + swTexCoordfv(RLSW.array.texcoords + 2 * i); + } + if (RLSW.array.normals) { + swNormal3fv(RLSW.array.normals + 3 * i); + } + if (RLSW.array.colors) { + swColor4ubv(RLSW.array.colors + 4 * i); + } + swVertex3fv(RLSW.array.positions + 3 * i); + } + + swEnd(); +} + +uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) +{ + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow + return 0; + } + + sw_texture_t texture = { 0 }; + texture.pixels = data; + texture.width = width; + texture.height = height; + texture.format = format; + texture.minFilter = SW_NEAREST; + texture.magFilter = SW_NEAREST; + texture.sWrap = SW_REPEAT; + texture.tWrap = SW_REPEAT; + texture.tx = 1.0f / width; + texture.ty = 1.0f / height; + (void)mipmapCount; + + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + + RLSW.loadedTextures[id] = texture; + + return id; +} + +void swUnloadTexture(uint32_t id) +{ + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + RLSW.loadedTextures[id].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; +} + +void swTextureParameters(uint32_t id, int param, int value) +{ + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + switch (param) { + + case SW_TEXTURE_MIN_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->minFilter = value; + break; + + case SW_TEXTURE_MAG_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->magFilter = value; + break; + + case SW_TEXTURE_WRAP_S: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->sWrap = value; + break; + + case SW_TEXTURE_WRAP_T: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->tWrap = value; + break; + + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + + } +} + +void swBindTexture(uint32_t id) +{ + if (id >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.currentTexture = id; +} + +#endif // RLSW_IMPL From f305490c33a382d35a732ab678d848b102600487 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 18:20:05 +0100 Subject: [PATCH 003/105] branchless float saturation --- src/external/rlsw.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 6af7fcf84..6b7a7376a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -444,6 +444,27 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma } } +static inline float sw_saturate(float x) +{ + // After several comparisons, this saturation method + // seems to be the most optimized by GCC and Clang, + // and it does not produce any conditional branching. + + // However, it is possible that a clamp could be + // more efficient on certain platforms. + // Comparisons will need to be made. + + // SEE: https://godbolt.org/z/5qYznK5zj + + // Saturation from below: max(0, x) + float y = 0.5f * (x + fabsf(x)); + + // Saturation from above: min(1, y) + return y - 0.5f * ((y - 1.0f) + fabsf(y - 1.0f)); + + // return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x); +} + static inline float sw_lerp(float a, float b, float t) { return a + t * (b - a); @@ -700,8 +721,7 @@ static inline void sw_map_repeat(int* out, float in, int max) static inline void sw_map_clamp_to_edge(int* out, float in, int max) { - in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); - *out = (int)(in * (max - 1) + 0.5f); + *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); } static inline void sw_map_mirrored_repeat(int* out, float in, int max) @@ -991,12 +1011,9 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ - float lerp = start->color[i] + t * dcol[i]; \ - float finalColor = texColor[i] * lerp; \ - /* Inline clamp to keep the value between 0 and 1 */ \ - /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ - dst[i] = (uint8_t)(finalColor * 255.0f); \ + float finalColor = texColor[i]; \ + finalColor *= start->color[i] + t * dcol[i]; \ + dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ else \ @@ -1004,10 +1021,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Interpolate the color */ \ for (int i = 0; i < 4; i++) { \ float finalColor = start->color[i] + t * dcol[i]; \ - /* Inline clamp to keep the value between 0 and 1 */ \ - /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ - dst[i] = (uint8_t)(finalColor * 255.0f); \ + dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ \ From b70f02fdf263632b7e9549044b6da58fe74476ba Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 18:32:36 +0100 Subject: [PATCH 004/105] apply perspective correction to colors --- src/external/rlsw.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 6b7a7376a..d8155636c 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -930,6 +930,12 @@ void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VER v->texcoord[0] *= v->homogeneous[3]; v->texcoord[1] *= v->homogeneous[3]; + // Division of colors (perspective correct) + v->color[0] *= v->homogeneous[3]; + v->color[1] *= v->homogeneous[3]; + v->color[2] *= v->homogeneous[3]; + v->color[3] *= v->homogeneous[3]; + // Transform to screen space v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; @@ -1012,7 +1018,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ float finalColor = texColor[i]; \ - finalColor *= start->color[i] + t * dcol[i]; \ + finalColor *= (start->color[i] + t * dcol[i]) * w; \ dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ @@ -1020,7 +1026,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Interpolate the color */ \ for (int i = 0; i < 4; i++) { \ - float finalColor = start->color[i] + t * dcol[i]; \ + float finalColor = (start->color[i] + t * dcol[i]) * w; \ dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ From dcd60be4779d4cd0cc9e56fe64b72d8b395b5160 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 21:23:31 +0100 Subject: [PATCH 005/105] impl line clipping and rasterization + tweak function names --- src/external/rlsw.h | 406 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 350 insertions(+), 56 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d8155636c..c4d10a9fb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -296,6 +296,12 @@ void swBindTexture(uint32_t id); #define SW_STATE_DEPTH_TEST (1 << 1) #define SW_STATE_CULL_FACE (1 << 2) +#define SW_CLIP_INSIDE (0x00) // 0000 +#define SW_CLIP_LEFT (0x01) // 0001 +#define SW_CLIP_RIGHT (0x02) // 0010 +#define SW_CLIP_BOTTOM (0x04) // 0100 +#define SW_CLIP_TOP (0x08) // 1000 + /* === Internal Structs === */ typedef float sw_matrix_t[4*4]; @@ -479,6 +485,9 @@ static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t return result; } + +/* === Pixel Format Conversion Part === */ + static inline uint32_t sw_cvt_hf_ui(uint16_t h) { uint32_t s = (uint32_t)(h & 0x8000) << 16; @@ -708,7 +717,10 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse } } -static inline void sw_map_repeat(int* out, float in, int max) + +/* === Texture Sampling Part === */ + +static inline void sw_texture_map_repeat(int* out, float in, int max) { // Upscale to nearest texture coordinates // NOTE: We use '(int)(x+0.5)' although this is incorrect @@ -719,48 +731,48 @@ static inline void sw_map_repeat(int* out, float in, int max) *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); } -static inline void sw_map_clamp_to_edge(int* out, float in, int max) +static inline void sw_texture_map_clamp_to_edge(int* out, float in, int max) { *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); } -static inline void sw_map_mirrored_repeat(int* out, float in, int max) +static inline void sw_texture_map_mirrored_repeat(int* out, float in, int max) { in = fmodf(fabsf(in), 2); if (in > 1.0f) in = 1.0f - (in - 1.0f); *out = (int)(in * (max - 1) + 0.5f); } -static inline void sw_map(int* out, float in, int max, SWwrap mode) +static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) { switch (mode) { case SW_REPEAT: - sw_map_repeat(out, in, max); + sw_texture_map_repeat(out, in, max); break; case SW_CLAMP_TO_EDGE: - sw_map_clamp_to_edge(out, in, max); + sw_texture_map_clamp_to_edge(out, in, max); break; case SW_MIRRORED_REPEAT: - sw_map_mirrored_repeat(out, in, max); + sw_texture_map_mirrored_repeat(out, in, max); break; } } -static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) +static inline void sw_texture_sample_nearest(float* color, const sw_texture_t* tex, float u, float v) { int x, y; - sw_map(&x, u, tex->width, tex->sWrap); - sw_map(&y, v, tex->height, tex->tWrap); + sw_texture_map(&x, u, tex->width, tex->sWrap); + sw_texture_map(&y, v, tex->height, tex->tWrap); sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); } -static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) +static inline void sw_texture_sample_linear(float* color, const sw_texture_t* tex, float u, float v) { int x0, y0, x1, y1; - sw_map(&x0, u, tex->width, tex->sWrap); - sw_map(&y0, v, tex->height, tex->tWrap); - sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); - sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + sw_texture_map(&x0, u, tex->width, tex->sWrap); + sw_texture_map(&y0, v, tex->height, tex->tWrap); + sw_texture_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_texture_map(&y1, v + tex->ty, tex->height, tex->tWrap); float fx = u * (tex->width - 1) - x0; float fy = v * (tex->height - 1) - y0; @@ -779,7 +791,7 @@ static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* } } -static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, +static inline void sw_texture_sample(float* color, const sw_texture_t* tex, float u, float v, float xDu, float yDu, float xDv, float yDv) { // TODO: It seems there are some incorrect detections depending on the context @@ -796,21 +808,33 @@ static inline void sw_sample_texture(float* color, const sw_texture_t* tex, floa if (L > 1.0f) { // Minification if (tex->minFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); + sw_texture_sample_nearest(color, tex, u, v); } else if (tex->minFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); + sw_texture_sample_linear(color, tex, u, v); } } else { // Magnification if (tex->magFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); + sw_texture_sample_nearest(color, tex, u, v); } else if (tex->magFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); + sw_texture_sample_linear(color, tex, u, v); } } } -static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + +/* === Projection Helper Functions === */ + +static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) +{ + screen[0] = RLSW.vpPos[0] + (ndc[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + screen[1] = RLSW.vpPos[1] + (ndc[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; +} + + +/* === Triangle Rendering Part === */ + +static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { @@ -839,7 +863,7 @@ static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_ return *vertexCounter > 0; } -static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { for (int iAxis = 0; iAxis < 3; iAxis++) { @@ -905,15 +929,14 @@ static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGO return *vertexCounter > 0; } -void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; - for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; - sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); } - if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; @@ -937,14 +960,13 @@ void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VER v->color[3] *= v->homogeneous[3]; // Transform to screen space - v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + sw_project_ndc_to_screen(v->screen, v->homogeneous); } } } -#define DEFINE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ -void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ /* Calculate the horizontal width and avoid division by zero */ \ @@ -1013,7 +1035,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Sample the texture */ \ float texColor[4]; \ - sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ \ /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ @@ -1043,9 +1065,9 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ } -#define DEFINE_RASTER_TRIANGLE(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ -void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ - const sw_texture_t* tex) \ +#define DEFINE_TRIANGLE_RASTER(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ +static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ + const sw_texture_t* tex) \ { \ /* Swap vertices by increasing y */ \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ @@ -1134,17 +1156,17 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* } \ } -DEFINE_RASTER_SCANLINE(sw_raster_scanline, false, false) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex, true, false) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_depth, false, true) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex_depth, true, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, false, false) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex, true, false) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_depth, false, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex_depth, true, true) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle, sw_raster_scanline, false) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex, sw_raster_scanline_tex, true) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_depth, sw_raster_scanline_depth, false) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex_depth, sw_raster_scanline_tex_depth, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex, sw_triangle_raster_scanline_tex, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_depth, sw_triangle_raster_scanline_depth, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex_depth, sw_triangle_raster_scanline_tex_depth, true) -void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) +static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) { int vertexCounter = 3; @@ -1153,39 +1175,39 @@ void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_v polygon[1] = *v1; polygon[2] = *v2; - sw_project_and_clip_triangle(polygon, &vertexCounter); + sw_triangle_project_and_clip(polygon, &vertexCounter); if (vertexCounter < 3) { return; } if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_tex_depth( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_tex_depth( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_tex( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_tex( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_depth( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_depth( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); @@ -1193,6 +1215,274 @@ void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_v } } + +/* === Line Rendering Part === */ + +uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) +{ + uint8_t code = SW_CLIP_INSIDE; + if (screen[0] < xMin) code |= SW_CLIP_LEFT; + if (screen[0] > xMax) code |= SW_CLIP_RIGHT; + if (screen[1] < yMin) code |= SW_CLIP_TOP; + if (screen[1] > yMax) code |= SW_CLIP_BOTTOM; + return code; +} + +bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) +{ + int xMin = RLSW.vpMin[0]; + int yMin = RLSW.vpMin[1]; + int xMax = RLSW.vpMax[0]; + int yMax = RLSW.vpMax[1]; + + bool accept = false; + uint8_t code0, code1; + float m = 0; + + if (v1->screen[0] != v2->screen[0]) { + m = (v2->screen[1] - v1->screen[1]) / (v2->screen[0] - v1->screen[0]); + } + + for (;;) { + code0 = sw_line_clip_encode_2d(v1->screen, xMin, yMin, xMax, yMax); + code1 = sw_line_clip_encode_2d(v2->screen, xMin, yMin, xMax, yMax); + + // Accepted if both endpoints lie within rectangle + if ((code0 | code1) == 0) { + accept = true; + break; + } + + // Rejected if both endpoints are outside rectangle, in same region + if (code0 & code1) break; + + if (code0 == SW_CLIP_INSIDE) { + uint8_t ctmp = code0; code0 = code1; code1 = ctmp; + sw_vertex_t vtmp = *v1; *v1 = *v2; *v2 = vtmp; + } + + if (code0 & SW_CLIP_LEFT) { + v1->screen[1] += (RLSW.vpMin[0] - v1->screen[0])*m; + v1->screen[0] = (float)RLSW.vpMin[0]; + } + else if (code0 & SW_CLIP_RIGHT) { + v1->screen[1] += (RLSW.vpMax[0] - v1->screen[0])*m; + v1->screen[0] = (float)RLSW.vpMax[0]; + } + else if (code0 & SW_CLIP_BOTTOM) { + if (m) v1->screen[0] += (RLSW.vpMin[1] - v1->screen[1]) / m; + v1->screen[1] = (float)RLSW.vpMin[1]; + } + else if (code0 & SW_CLIP_TOP) { + if (m) v1->screen[0] += (RLSW.vpMax[1] - v1->screen[1]) / m; + v1->screen[1] = (float)RLSW.vpMax[1]; + } + } + + return accept; +} + +bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +{ + if (fabsf(p) < SW_CLIP_EPSILON) { + // Check if the line is entirely outside the window + if (q < -SW_CLIP_EPSILON) return 0; // Completely outside + return 1; // Completely inside or on the edges + } + + const float r = q / p; + + if (p < 0) { + if (r > *t2) return 0; + if (r > *t1) *t1 = r; + } else { + if (r < *t1) return 0; + if (r < *t2) *t2 = r; + } + + return 1; +} + +bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +{ + // TODO: Lerp all vertices here, not just homogeneous coordinates + + float t1 = 0, t2 = 1; + + float delta[4]; + for (int i = 0; i < 4; i++) { + delta[i] = v2->homogeneous[i] - v1->homogeneous[i]; + } + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[0], -delta[3] + delta[0], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[0], -delta[3] - delta[0], &t1, &t2)) return false; + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[1], -delta[3] + delta[1], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[1], -delta[3] - delta[1], &t1, &t2)) return false; + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[2], -delta[3] + delta[2], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[2], -delta[3] - delta[2], &t1, &t2)) return false; + + if (t2 < 1) { + for (int i = 0; i < 4; i++) { + v2->homogeneous[i] = v1->homogeneous[i] + t2 * delta[i]; + } + } + + if (t1 > 0) { + for (int i = 0; i < 4; i++) { + v1->homogeneous[i] = v1->homogeneous[i] + t1 * delta[i]; + } + } + + return true; +} + +bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) +{ + sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); + sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); + + if (v0->homogeneous[3] == 1.0f && v1->homogeneous[3] == 1.0f) { + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + if (!sw_line_clip_2d(v0, v1)) { + return false; + } + } + else { + if (!sw_line_clip_3d(v0, v1)) { + return false; + } + // Convert XYZ coordinates to NDC + v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; + v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; + for (int i = 0; i < 3; i++) { + v0->homogeneous[i] *= v0->homogeneous[3]; + v1->homogeneous[i] *= v1->homogeneous[3]; + } + // Convert NDC coordinates to screen space + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + } + + return true; +} + +#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST) \ +void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ +{ \ + int x1 = (int)v0->screen[0]; \ + int y1 = (int)v0->screen[1]; \ + int x2 = (int)v1->screen[0]; \ + int y2 = (int)v1->screen[1]; \ + \ + float z1 = v0->homogeneous[2]; \ + float z2 = v1->homogeneous[2]; \ + \ + int shortLen = y2 - y1; \ + int longLen = x2 - x1; \ + bool yLonger = 0; \ + \ + if (abs(shortLen) > abs(longLen)) { \ + int tmp = shortLen; \ + shortLen = longLen; \ + longLen = tmp; \ + yLonger = 1; \ + } \ + \ + float invEndVal = 1.0f / longLen; \ + int endVal = longLen; \ + int sgnInc = 1; \ + \ + if (longLen < 0) { \ + longLen = -longLen; \ + sgnInc = -1; \ + } \ + \ + int decInc = (longLen == 0) ? 0 \ + : (shortLen << 16) / longLen; \ + \ + const int fb_width = RLSW.framebuffer.width; \ + const float z_diff = z2 - z1; \ + \ + uint8_t* color_buffer = RLSW.framebuffer.color; \ + uint16_t* depth_buffer = RLSW.framebuffer.depth; \ + \ + int j = 0; \ + if (yLonger) { \ + for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ + float t = (float)i * invEndVal; \ + \ + int x = x1 + (j >> 16); \ + int y = y1 + i; \ + float z = z1 + t * z_diff; \ + int pixel_index = y * fb_width + x; \ + \ + uint16_t* dptr = &depth_buffer[pixel_index]; \ + if (ENABLE_DEPTH_TEST) { \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) continue; \ + } \ + \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + int color_index = 4 * pixel_index; \ + uint8_t* cptr = &color_buffer[color_index]; \ + \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ + } \ + } \ + else { \ + for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ + float t = (float)i * invEndVal; \ + \ + int x = x1 + i; \ + int y = y1 + (j >> 16); \ + float z = z1 + t * z_diff; \ + int pixel_index = y * fb_width + x; \ + \ + uint16_t* dptr = &depth_buffer[pixel_index]; \ + if (ENABLE_DEPTH_TEST) { \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) continue; \ + } \ + \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + int color_index = 4 * pixel_index; \ + uint8_t* cptr = &color_buffer[color_index]; \ + \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ + } \ + } \ +} + +DEFINE_LINE_RASTER(sw_line_raster, false) +DEFINE_LINE_RASTER(sw_line_raster_depth, true) + +static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) +{ + if (!sw_line_project_and_clip(v0, v1)) { + return; + } + + if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { + sw_line_raster_depth(v0, v1); + } + else { + sw_line_raster(v0, v1); + } +} + +/* === Some Validity Check Helper === */ + static inline bool sw_is_texture_id_valid(uint32_t id) { bool valid = true; @@ -1214,6 +1504,7 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } + /* === Public Implementation === */ void swInit(int w, int h) @@ -1694,22 +1985,25 @@ void swVertex4fv(const float* v) case SW_POINTS: break; case SW_LINES: - neededVertices = 2; + sw_line_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1] + ); break; case SW_TRIANGLES: - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[0], &RLSW.vertexBuffer[1], &RLSW.vertexBuffer[2] ); break; case SW_QUADS: - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[0], &RLSW.vertexBuffer[1], &RLSW.vertexBuffer[2] ); - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[2], &RLSW.vertexBuffer[3], &RLSW.vertexBuffer[0] From 5f92a0fcd0213c83a6de294fcf9aba3e0d1162df Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 21:42:29 +0100 Subject: [PATCH 006/105] impl face culling --- src/external/rlsw.h | 50 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c4d10a9fb..3e1340569 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -145,7 +145,7 @@ typedef enum { SW_LINES = GL_LINES, SW_TRIANGLES = GL_TRIANGLES, SW_QUADS = GL_QUADS, -} SWfill; +} SWdraw; typedef enum { SW_FRONT = GL_FRONT, @@ -232,7 +232,9 @@ void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(void); -void swBegin(SWfill mode); +void swCullFace(SWface face); + +void swBegin(SWdraw mode); void swEnd(void); void swVertex2i(int x, int y); @@ -270,7 +272,7 @@ void swNormal3f(float x, float y, float z); void swNormal3fv(const float* v); void swBindArray(SWarray type, void *buffer); -void swDrawArrays(SWfill mode, int offset, int count); +void swDrawArrays(SWdraw mode, int offset, int count); uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); void swUnloadTexture(uint32_t id); @@ -370,7 +372,7 @@ typedef struct { sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) float pointSize; // Rasterized point size float lineWidth; // Rasterized line width @@ -936,6 +938,18 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); } + if (RLSW.stateFlags & SW_STATE_CULL_FACE) { + float x0 = polygon[0].homogeneous[0], y0 = polygon[0].homogeneous[1]; + float x1 = polygon[1].homogeneous[0], y1 = polygon[1].homogeneous[1]; + float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; + + float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + if ((RLSW.cullFace == SW_BACK && sgnArea >= 0) || (RLSW.cullFace == SW_FRONT && sgnArea <= 0)) { + *vertexCounter = 0; + return; + } + } + if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; @@ -965,7 +979,7 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP } } -#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ @@ -1504,6 +1518,11 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } +static inline bool sw_is_face_valid(int face) +{ + return (face == SW_FRONT || face == SW_BACK); +} + /* === Public Implementation === */ @@ -1546,6 +1565,8 @@ void swInit(int w, int h) RLSW.vertexBuffer[0].normal[1] = 0.0f; RLSW.vertexBuffer[0].normal[2] = 1.0f; + RLSW.cullFace = SW_BACK; + static const float defTex[3*2*2] = { 1.0f, 1.0f, 1.0f, @@ -1889,14 +1910,23 @@ void swClear(void) } } -void swBegin(SWfill mode) +void swCullFace(SWface face) +{ + if (!sw_is_face_valid(face)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.cullFace = face; +} + +void swBegin(SWdraw mode) { if (mode < SW_POINTS || mode > SW_QUADS) { RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.vertexCounter = 0; - RLSW.fillMode = mode; + RLSW.drawMode = mode; } void swEnd(void) @@ -1960,7 +1990,7 @@ void swVertex4fv(const float* v) RLSW.vertexCounter++; int neededVertices = 0; - switch (RLSW.fillMode) { + switch (RLSW.drawMode) { case SW_POINTS: neededVertices = 1; break; @@ -1981,7 +2011,7 @@ void swVertex4fv(const float* v) sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); - switch (RLSW.fillMode) { + switch (RLSW.drawMode) { case SW_POINTS: break; case SW_LINES: @@ -2259,7 +2289,7 @@ void swBindArray(SWarray type, void *buffer) } } -void swDrawArrays(SWfill mode, int offset, int count) +void swDrawArrays(SWdraw mode, int offset, int count) { if (RLSW.array.positions == 0) { RLSW.errCode = SW_INVALID_OPERATION; From 804240966abf787c7686bbeb8771399fc0866f9d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 00:16:58 +0100 Subject: [PATCH 007/105] impl color blending --- src/external/rlsw.h | 472 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 392 insertions(+), 80 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 3e1340569..8f6fc94ca 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -69,6 +69,7 @@ #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 #define GL_MODELVIEW 0x1700 #define GL_PROJECTION 0x1701 @@ -97,6 +98,18 @@ #define GL_FRONT 0x0404 #define GL_BACK 0x0405 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 + #define GL_NEAREST 0x2600 #define GL_LINEAR 0x2601 @@ -124,7 +137,8 @@ typedef enum { SW_TEXTURE_2D = GL_TEXTURE_2D, SW_DEPTH_TEST = GL_DEPTH_TEST, - SW_CULL_FACE = GL_CULL_FACE + SW_CULL_FACE = GL_CULL_FACE, + SW_BLEND = GL_BLEND } SWstate; typedef enum { @@ -152,6 +166,20 @@ typedef enum { SW_BACK = GL_BACK, } SWface; +typedef enum { + SW_ZERO = GL_ZERO, + SW_ONE = GL_ONE, + SW_SRC_COLOR = GL_SRC_COLOR, + SW_ONE_MINUS_SRC_COLOR = GL_ONE_MINUS_SRC_COLOR, + SW_SRC_ALPHA = GL_SRC_ALPHA, + SW_ONE_MINUS_SRC_ALPHA = GL_ONE_MINUS_SRC_ALPHA, + SW_DST_ALPHA = GL_DST_ALPHA, + SW_ONE_MINUS_DST_ALPHA = GL_ONE_MINUS_DST_ALPHA, + SW_DST_COLOR = GL_DST_COLOR, + SW_ONE_MINUS_DST_COLOR = GL_ONE_MINUS_DST_COLOR, + SW_SRC_ALPHA_SATURATE = GL_SRC_ALPHA_SATURATE +} SWfactor; + typedef enum { SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) @@ -206,6 +234,7 @@ typedef enum { SW_INVALID_OPERATION = GL_INVALID_OPERATION, } SWerrcode; + /* === Public API === */ void swInit(int w, int h); @@ -232,6 +261,7 @@ void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(void); +void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); void swBegin(SWdraw mode); @@ -294,9 +324,12 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) +#define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) + #define SW_STATE_TEXTURE_2D (1 << 0) #define SW_STATE_DEPTH_TEST (1 << 1) #define SW_STATE_CULL_FACE (1 << 2) +#define SW_STATE_BLEND (1 << 3) #define SW_CLIP_INSIDE (0x00) // 0000 #define SW_CLIP_LEFT (0x01) // 0001 @@ -354,9 +387,6 @@ typedef struct { uint32_t currentTexture; sw_matrix_t *currentMatrix; - uint32_t blendFunction; - uint32_t depthFunction; - int vpPos[2]; // Represents the top-left corner of the viewport int vpDim[2]; // Represents the dimensions of the viewport (minus one) int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) @@ -392,6 +422,9 @@ typedef struct { SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) bool modelMatrixUsed; // Flag indicating if the model matrix is used + SWfactor srcFactor; + SWfactor dstFactor; + SWface cullFace; // Faces to cull SWerrcode errCode; // Last error code @@ -825,6 +858,144 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa } +/* === Color Blending Functions === */ + +static inline void sw_blend_colors(float dst[4], float src[4]) +{ + float src_factor[4] = { 0 }; + float dst_factor[4] = { 0 }; + + switch (RLSW.srcFactor) { + case SW_ZERO: + src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 0.0f; + break; + case SW_ONE: + src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 1.0f; + break; + case SW_SRC_COLOR: + src_factor[0] = src[0]; + src_factor[1] = src[1]; + src_factor[2] = src[2]; + src_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_COLOR: + src_factor[0] = 1.0f - src[0]; + src_factor[1] = 1.0f - src[1]; + src_factor[2] = 1.0f - src[2]; + src_factor[3] = 1.0f - src[3]; + break; + case SW_SRC_ALPHA: + src_factor[0] = src[3]; + src_factor[1] = src[3]; + src_factor[2] = src[3]; + src_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_ALPHA: + src_factor[0] = 1.0f - src[3]; + src_factor[1] = 1.0f - src[3]; + src_factor[2] = 1.0f - src[3]; + src_factor[3] = 1.0f - src[3]; + break; + case SW_DST_ALPHA: + src_factor[0] = dst[3]; + src_factor[1] = dst[3]; + src_factor[2] = dst[3]; + src_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_ALPHA: + src_factor[0] = 1.0f - dst[3]; + src_factor[1] = 1.0f - dst[3]; + src_factor[2] = 1.0f - dst[3]; + src_factor[3] = 1.0f - dst[3]; + break; + case SW_DST_COLOR: + src_factor[0] = dst[0]; + src_factor[1] = dst[1]; + src_factor[2] = dst[2]; + src_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_COLOR: + src_factor[0] = 1.0f - dst[0]; + src_factor[1] = 1.0f - dst[1]; + src_factor[2] = 1.0f - dst[2]; + src_factor[3] = 1.0f - dst[3]; + break; + case SW_SRC_ALPHA_SATURATE: + src_factor[0] = 1.0f; + src_factor[1] = 1.0f; + src_factor[2] = 1.0f; + src_factor[3] = fminf(src[3], 1.0f); + break; + } + + switch (RLSW.dstFactor) { + case SW_ZERO: + dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 0.0f; + break; + case SW_ONE: + dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 1.0f; + break; + case SW_SRC_COLOR: + dst_factor[0] = src[0]; + dst_factor[1] = src[1]; + dst_factor[2] = src[2]; + dst_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_COLOR: + dst_factor[0] = 1.0f - src[0]; + dst_factor[1] = 1.0f - src[1]; + dst_factor[2] = 1.0f - src[2]; + dst_factor[3] = 1.0f - src[3]; + break; + case SW_SRC_ALPHA: + dst_factor[0] = src[3]; + dst_factor[1] = src[3]; + dst_factor[2] = src[3]; + dst_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_ALPHA: + dst_factor[0] = 1.0f - src[3]; + dst_factor[1] = 1.0f - src[3]; + dst_factor[2] = 1.0f - src[3]; + dst_factor[3] = 1.0f - src[3]; + break; + case SW_DST_ALPHA: + dst_factor[0] = dst[3]; + dst_factor[1] = dst[3]; + dst_factor[2] = dst[3]; + dst_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_ALPHA: + dst_factor[0] = 1.0f - dst[3]; + dst_factor[1] = 1.0f - dst[3]; + dst_factor[2] = 1.0f - dst[3]; + dst_factor[3] = 1.0f - dst[3]; + break; + case SW_DST_COLOR: + dst_factor[0] = dst[0]; + dst_factor[1] = dst[1]; + dst_factor[2] = dst[2]; + dst_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_COLOR: + dst_factor[0] = 1.0f - dst[0]; + dst_factor[1] = 1.0f - dst[1]; + dst_factor[2] = 1.0f - dst[2]; + dst_factor[3] = 1.0f - dst[3]; + break; + case SW_SRC_ALPHA_SATURATE: + // NOTE: This case is only available for the source. + // Since the factors are validated before assignment, + // we should never reach this point. + break; + } + + for (int i = 0; i < 4; ++i) { + dst[i] = src_factor[i] * src[i] + dst_factor[i] * dst[i]; + } +} + + /* === Projection Helper Functions === */ static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) @@ -979,9 +1150,9 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP } } -#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ - const sw_vertex_t* end, float yDu, float yDv) \ + const sw_vertex_t* end, float yDu, float yDv) \ { \ /* Calculate the horizontal width and avoid division by zero */ \ float dx = end->screen[0] - start->screen[0]; \ @@ -1022,12 +1193,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ - uint8_t* dst = row_ptr + xStart * 4; \ + uint8_t* cptrRow = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ + uint8_t* cptr = cptrRow + xStart * 4; \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ - uint16_t* dptr = depth_row; \ + uint16_t* dptrRow = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ + uint16_t* dptr = dptrRow; \ \ /* Scanline rasterization loop */ \ for (int x = xStart; x < xEnd; x++) { \ @@ -1045,32 +1216,55 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Update the depth buffer */ \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ - if (ENABLE_TEXTURE) \ + if (ENABLE_COLOR_BLEND) \ { \ - /* Sample the texture */ \ - float texColor[4]; \ - sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + float dstColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; \ + float srcColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; \ + \ + if (ENABLE_TEXTURE) { \ + sw_texture_sample(srcColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + } \ \ - /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ - float finalColor = texColor[i]; \ - finalColor *= (start->color[i] + t * dcol[i]) * w; \ - dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + dstColor[i] = (float)cptr[i] / 255; \ + srcColor[i] *= (start->color[i] + t * dcol[i]) * w; \ + } \ + \ + sw_blend_colors(dstColor, srcColor); \ + \ + for (int i = 0; i < 4; i++) { \ + cptr[i] = (uint8_t)(sw_saturate(dstColor[i]) * 255); \ } \ } \ else \ { \ - /* Interpolate the color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = (start->color[i] + t * dcol[i]) * w; \ - dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + if (ENABLE_TEXTURE) \ + { \ + /* Sample the texture */ \ + float texColor[4]; \ + sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + \ + /* Interpolate the color and modulate by the texture color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = texColor[i]; \ + finalColor *= (start->color[i] + t * dcol[i]) * w; \ + cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + } \ + } \ + else \ + { \ + /* Interpolate the color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = (start->color[i] + t * dcol[i]) * w; \ + cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + } \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - dst += 4; \ + cptr += 4; \ dptr++; \ if (ENABLE_TEXTURE) { \ u += xDu; \ @@ -1170,15 +1364,23 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const } \ } -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, false, false) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex, true, false) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_depth, false, true) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex_depth, true, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, 0, 0, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX, 1, 0, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH, 0, 1, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_BLEND, 0, 0, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH, 1, 1, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_BLEND, 1, 0, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH_BLEND, 0, 1, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH_BLEND, 1, 1, 1) DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex, sw_triangle_raster_scanline_tex, true) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_depth, sw_triangle_raster_scanline_depth, false) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex_depth, sw_triangle_raster_scanline_tex_depth, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX, sw_triangle_raster_scanline_TEX, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH, sw_triangle_raster_scanline_DEPTH, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_BLEND, sw_triangle_raster_scanline_BLEND, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH, sw_triangle_raster_scanline_TEX_DEPTH, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND, sw_triangle_raster_scanline_TEX_BLEND, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND, sw_triangle_raster_scanline_DEPTH_BLEND, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_scanline_TEX_DEPTH_BLEND, true) static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) { @@ -1195,37 +1397,39 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* return; } - if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_tex_depth( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } +# define TRIANGLE_RASTER(RASTER_FUNC) \ + { \ + for (int i = 0; i < vertexCounter - 2; i++) { \ + RASTER_FUNC( \ + &polygon[0], &polygon[i + 1], &polygon[i + 2], \ + &RLSW.loadedTextures[RLSW.currentTexture] \ + ); \ + } \ } - else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_tex( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + + if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND) } - else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_depth( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX) } else { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + TRIANGLE_RASTER(sw_triangle_raster) } } @@ -1383,7 +1587,7 @@ bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) return true; } -#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST) \ +#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ int x1 = (int)v0->screen[0]; \ @@ -1417,11 +1621,11 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int decInc = (longLen == 0) ? 0 \ : (shortLen << 16) / longLen; \ \ - const int fb_width = RLSW.framebuffer.width; \ - const float z_diff = z2 - z1; \ + const int fbWidth = RLSW.framebuffer.width; \ + const float zDiff = z2 - z1; \ \ - uint8_t* color_buffer = RLSW.framebuffer.color; \ - uint16_t* depth_buffer = RLSW.framebuffer.depth; \ + uint8_t* colorBuffer = RLSW.framebuffer.color; \ + uint16_t* depthBuffer = RLSW.framebuffer.depth; \ \ int j = 0; \ if (yLonger) { \ @@ -1430,10 +1634,10 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ int x = x1 + (j >> 16); \ int y = y1 + i; \ - float z = z1 + t * z_diff; \ - int pixel_index = y * fb_width + x; \ + float z = z1 + t * zDiff; \ + int pixel_index = y * fbWidth + x; \ \ - uint16_t* dptr = &depth_buffer[pixel_index]; \ + uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ float depth = (float)(*dptr) / UINT16_MAX; \ if (z > depth) continue; \ @@ -1442,11 +1646,27 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ int color_index = 4 * pixel_index; \ - uint8_t* cptr = &color_buffer[color_index]; \ + uint8_t* cptr = &colorBuffer[color_index]; \ \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + float srcColor[4]; \ + for (int j = 0; j < 4; j++) { \ + dstColor[j] = (float)cptr[i] / 255; \ + srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ + } \ + sw_blend_colors(dstColor, srcColor); \ + for (int j = 0; j < 4; j++) { \ + cptr[j] = (uint8_t)(dstColor[j] * 255); \ + } \ + } \ + else \ + { \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ } \ } \ } \ @@ -1456,10 +1676,10 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ int x = x1 + i; \ int y = y1 + (j >> 16); \ - float z = z1 + t * z_diff; \ - int pixel_index = y * fb_width + x; \ + float z = z1 + t * zDiff; \ + int pixel_index = y * fbWidth + x; \ \ - uint16_t* dptr = &depth_buffer[pixel_index]; \ + uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ float depth = (float)(*dptr) / UINT16_MAX; \ if (z > depth) continue; \ @@ -1468,18 +1688,36 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ int color_index = 4 * pixel_index; \ - uint8_t* cptr = &color_buffer[color_index]; \ + uint8_t* cptr = &colorBuffer[color_index]; \ \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + float srcColor[4]; \ + for (int j = 0; j < 4; j++) { \ + dstColor[j] = (float)cptr[i] / 255; \ + srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ + } \ + sw_blend_colors(dstColor, srcColor); \ + for (int j = 0; j < 4; j++) { \ + cptr[j] = (uint8_t)(dstColor[j] * 255); \ + } \ + } \ + else \ + { \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ } \ } \ } \ } -DEFINE_LINE_RASTER(sw_line_raster, false) -DEFINE_LINE_RASTER(sw_line_raster_depth, true) +DEFINE_LINE_RASTER(sw_line_raster, 0, 0) +DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0) +DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1) +DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1) static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) { @@ -1487,8 +1725,14 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) return; } - if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - sw_line_raster_depth(v0, v1); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_raster_DEPTH(v0, v1); } else { sw_line_raster(v0, v1); @@ -1523,6 +1767,54 @@ static inline bool sw_is_face_valid(int face) return (face == SW_FRONT || face == SW_BACK); } +static inline bool sw_is_blend_src_factor_valid(int blend) +{ + bool result = false; + + switch (blend) { + case SW_ZERO: + case SW_ONE: + case SW_SRC_COLOR: + case SW_ONE_MINUS_SRC_COLOR: + case SW_SRC_ALPHA: + case SW_ONE_MINUS_SRC_ALPHA: + case SW_DST_ALPHA: + case SW_ONE_MINUS_DST_ALPHA: + case SW_DST_COLOR: + case SW_ONE_MINUS_DST_COLOR: + case SW_SRC_ALPHA_SATURATE: + result = true; + break; + default: + break; + } + + return result; +} + +static inline bool sw_is_blend_dst_factor_valid(int blend) +{ + bool result = false; + + switch (blend) { + case SW_ZERO: + case SW_ONE: + case SW_SRC_COLOR: + case SW_ONE_MINUS_SRC_COLOR: + case SW_SRC_ALPHA: + case SW_ONE_MINUS_SRC_ALPHA: + case SW_DST_ALPHA: + case SW_ONE_MINUS_DST_ALPHA: + case SW_DST_COLOR: + case SW_ONE_MINUS_DST_COLOR: + result = true; + break; + default: + break; + } + + return result; +} /* === Public Implementation === */ @@ -1565,6 +1857,9 @@ void swInit(int w, int h) RLSW.vertexBuffer[0].normal[1] = 0.0f; RLSW.vertexBuffer[0].normal[2] = 1.0f; + RLSW.srcFactor = SW_SRC_ALPHA; + RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; + RLSW.cullFace = SW_BACK; static const float defTex[3*2*2] = @@ -1610,6 +1905,9 @@ void swEnable(SWstate state) case SW_CULL_FACE: RLSW.stateFlags |= SW_STATE_CULL_FACE; break; + case SW_BLEND: + RLSW.stateFlags |= SW_STATE_BLEND; + break; default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -1628,6 +1926,9 @@ void swDisable(SWstate state) case SW_CULL_FACE: RLSW.stateFlags &= ~SW_STATE_CULL_FACE; break; + case SW_BLEND: + RLSW.stateFlags &= ~SW_STATE_BLEND; + break; default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -1910,6 +2211,17 @@ void swClear(void) } } +void swBlendFunc(SWfactor sfactor, SWfactor dfactor) +{ + if (!sw_is_blend_src_factor_valid(sfactor) + || !sw_is_blend_dst_factor_valid(dfactor)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.srcFactor = sfactor; + RLSW.dstFactor = dfactor; +} + void swCullFace(SWface face) { if (!sw_is_face_valid(face)) { From ff1849dc97ee27646c41a3f0f9e488c166fce98a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 01:47:16 +0100 Subject: [PATCH 008/105] fixes and tweaks --- src/external/rlsw.h | 106 +++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 8f6fc94ca..90c1a7861 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -253,8 +253,8 @@ void swTranslatef(float x, float y, float z); void swRotatef(float angle, float x, float y, float z); void swScalef(float x, float y, float z); void swMultMatrixf(const float* mat); -void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); -void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); +void swFrustum(double left, double right, double bottom, double top, double znear, double zfar); +void swOrtho(double left, double right, double bottom, double top, double znear, double zfar); void swViewport(int x, int y, int width, int height); @@ -1115,7 +1115,7 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); - if ((RLSW.cullFace == SW_BACK && sgnArea >= 0) || (RLSW.cullFace == SW_FRONT && sgnArea <= 0)) { + if ((RLSW.cullFace == SW_FRONT && sgnArea >= 0) || (RLSW.cullFace == SW_BACK && sgnArea <= 0)) { *vertexCounter = 0; return; } @@ -2076,33 +2076,40 @@ void swRotatef(float angle, float x, float y, float z) { angle *= SW_DEG2RAD; - sw_matrix_t mat; - sw_matrix_id(mat); - float lengthSq = x*x + y*y + z*z; if (lengthSq != 1.0f && lengthSq != 0.0f) { - float invLenght = 1.0f / lengthSq; - x *= invLenght; - y *= invLenght; - z *= invLenght; + float invLength = 1.0f/sqrtf(lengthSq); + x *= invLength; + y *= invLength; + z *= invLength; } float sinres = sinf(angle); float cosres = cosf(angle); float t = 1.0f - cosres; - mat[0] = x*x*t + cosres; - mat[1] = y*x*t + z*sinres; - mat[2] = z*x*t - y*sinres; + sw_matrix_t mat; - mat[4] = x*y*t - z*sinres; - mat[5] = y*y*t + cosres; - mat[6] = z*y*t + x*sinres; + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + mat[3] = 0.0f; - mat[8] = x*z*t + y*sinres; - mat[9] = y*z*t - x*sinres; + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + mat[7] = 0.0f; + + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; mat[10] = z*z*t + cosres; + mat[11] = 0.0f; + + mat[12] = 0.0f; + mat[13] = 0.0f; + mat[14] = 0.0f; + mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); } @@ -2124,44 +2131,63 @@ void swMultMatrixf(const float* mat) sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); } -void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) +void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) { - sw_matrix_t mat = { 0 }; + sw_matrix_t mat; - float rl = right - left; - float tb = top - bottom; - float fn = zfar - znear; + double rl = right - left; + double tb = top - bottom; + double fn = zfar - znear; - mat[0] = (znear * 2.0f) / rl; - mat[5] = (znear * 2.0f) / tb; + mat[0] = (znear*2.0)/rl; + mat[1] = 0.0f; + mat[2] = 0.0f; + mat[3] = 0.0f; - mat[8] = (right + left) / rl; - mat[9] = (top + bottom) / tb; - mat[10] = -(zfar + znear) / fn; + mat[4] = 0.0f; + mat[5] = (znear*2.0)/tb; + mat[6] = 0.0f; + mat[7] = 0.0f; + + mat[8] = (right + left)/rl; + mat[9] = (top + bottom)/tb; + mat[10] = -(zfar + znear)/fn; mat[11] = -1.0f; - mat[14] = -(zfar * znear * 2.0f) / fn; + mat[12] = 0.0f; + mat[13] = 0.0f; + mat[14] = -(zfar*znear*2.0)/fn; + mat[15] = 0.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); } -void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) +void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) { - sw_matrix_t mat = { 0 }; + sw_matrix_t mat; - float rl = (right - left); - float tb = (top - bottom); - float fn = (zfar - znear); + double rl = right - left; + double tb = top - bottom; + double fn = zfar - znear; - mat[0] = 2.0f / rl; - mat[5] = 2.0f / tb; + mat[0] = 2.0f/rl; + mat[1] = 0.0f; + mat[2] = 0.0f; + mat[3] = 0.0f; - mat[10] = -2.0f / fn; + mat[4] = 0.0f; + mat[5] = 2.0f/tb; + mat[6] = 0.0f; + mat[7] = 0.0f; + + mat[8] = 0.0f; + mat[9] = 0.0f; + mat[10] = -2.0f/fn; mat[11] = 0.0f; - mat[12] = -(left + right) / rl; - mat[13] = -(top + bottom) / tb; - mat[14] = -(zfar + znear) / fn; + mat[12] = -(left + right)/rl; + mat[13] = -(top + bottom)/tb; + mat[14] = -(zfar + znear)/fn; mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); From a1b2ecbded5b7ab1f7ec36f7be84f583f89afbfe Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 01:58:15 +0100 Subject: [PATCH 009/105] add clear buffer bitmasks --- src/external/rlsw.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 90c1a7861..04b2784d4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -71,6 +71,9 @@ #define GL_CULL_FACE 0x0B44 #define GL_BLEND 0x0BE2 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_DEPTH_BUFFER_BIT 0x00000100 + #define GL_MODELVIEW 0x1700 #define GL_PROJECTION 0x1701 #define GL_TEXTURE 0x1702 @@ -141,6 +144,11 @@ typedef enum { SW_BLEND = GL_BLEND } SWstate; +typedef enum { + SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT, + SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT +} SWbuffer; + typedef enum { SW_PROJECTION = GL_PROJECTION, SW_MODELVIEW = GL_MODELVIEW, @@ -259,7 +267,7 @@ void swOrtho(double left, double right, double bottom, double top, double znear, void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); -void swClear(void); +void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); @@ -2227,13 +2235,31 @@ void swClearColor(float r, float g, float b, float a) RLSW.clearColor[3] = a * 255; } -void swClear(void) +void swClear(uint32_t bitmask) { int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - for (int i = 0; i < size; i++) { - ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); - RLSW.framebuffer.depth[i] = RLSW.clearDepth; + uint32_t* cptr = (uint32_t*)RLSW.framebuffer.color; + uint16_t* dptr = RLSW.framebuffer.depth; + + uint32_t c = *((uint32_t*)RLSW.clearColor); + uint16_t d = RLSW.clearDepth; + + if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { + for (int i = 0; i < size; i++) { + cptr[i] = c; + dptr[i] = d; + } + } + else if (bitmask & (SW_COLOR_BUFFER_BIT)) { + for (int i = 0; i < size; i++) { + cptr[i] = c; + } + } + else if (bitmask & SW_DEPTH_BUFFER_BIT) { + for (int i = 0; i < size; i++) { + dptr[i] = d; + } } } From 0b713cdfba89c0513888622539f47a0348005b42 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 03:30:41 +0100 Subject: [PATCH 010/105] small optimizations / tweaks --- src/external/rlsw.h | 89 ++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 04b2784d4..c6254841a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -324,6 +324,7 @@ void swBindTexture(uint32_t id); #ifdef RLSW_IMPL #include +#include #include /* === Defines and Macros === */ @@ -519,10 +520,10 @@ static inline float sw_lerp(float a, float b, float t) return a + t * (b - a); } -static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) +static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_vertex_t* b, float t) { sw_vertex_t result; - for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + for (int i = 0; i < offsetof(sw_vertex_t, screen) / sizeof(float); i++) { ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); } return result; @@ -1031,7 +1032,7 @@ static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON for (int i = 0; i < inputCounter; i++) { char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); } if (currDot > 0) { @@ -1070,7 +1071,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG for (int i = 0; i < inputCounter; i++) { char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); } if (currDot > 0) { @@ -1096,7 +1097,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG for (int i = 0; i < inputCounter; i++) { char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); } if (currDot > 0) { @@ -1162,17 +1163,13 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ - /* Calculate the horizontal width and avoid division by zero */ \ - float dx = end->screen[0] - start->screen[0]; \ - if (fabsf(dx) < 1e-4f) return; \ - \ /* Convert and center the screen coordinates */ \ int xStart = (int)(start->screen[0] + 0.5f); \ int xEnd = (int)(end->screen[0] + 0.5f); \ int y = (int)(start->screen[1] + 0.5f); \ \ /* Calculate the initial interpolation parameter and its increment */ \ - float dt = 1.0f / dx; \ + float dt = 1.0f / (end->screen[0] - start->screen[0]); \ float t = (xStart - start->screen[0]) * dt; \ \ float xDu, xDv; \ @@ -1295,20 +1292,20 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const float x1 = v1->screen[0], y1 = v1->screen[1]; \ float x2 = v2->screen[0], y2 = v2->screen[1]; \ \ - /* Reject degenerate triangles */ \ - float height = y2 - y0; \ - if (height < 1e-4f) return; \ + /* Compute height differences */ \ + float h20 = y2 - y0; \ + float h10 = y1 - y0; \ + float h21 = y2 - y1; \ \ - /* Precompute the inverse of the triangle height and */ \ - /* edge lengths with checks to avoid division by zero. */ \ - float inv_height = 1.0f / height; \ - float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; \ - float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; \ + /* Precompute the inverse values without additional checks */ \ + float invH20 = (h20 > 1e-6f) ? 1.0f / h20 : 0.0f; \ + float invH10 = (h10 > 1e-6f) ? 1.0f / h10 : 0.0f; \ + float invH21 = (h21 > 1e-6f) ? 1.0f / h21 : 0.0f; \ \ /* Pre-calculation of slopes (dx/dy) */ \ - float dx02 = (x2 - x0) * inv_height; \ - float dx01 = (x1 - x0) * inv_y1y0; \ - float dx12 = (x2 - x1) * inv_y2y1; \ + float dx02 = (x2 - x0) * invH20; \ + float dx01 = (x1 - x0) * invH10; \ + float dx12 = (x2 - x1) * invH21; \ \ /* Y bounds (vertical clipping) */ \ int yTop = (int)(y0 + 0.5f); \ @@ -1318,8 +1315,8 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const /* Global calculation of vertical texture gradients for the triangle */ \ float yDu, yDv; \ if (ENABLE_TEXTURE) { \ - yDu = (v2->texcoord[0] - v0->texcoord[0]) * inv_height; \ - yDv = (v2->texcoord[1] - v0->texcoord[1]) * inv_height; \ + yDu = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \ + yDv = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \ } \ \ /* Initializing scanline variables */ \ @@ -1328,22 +1325,35 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const \ /* Scanline for the upper part of the triangle */ \ for (int y = yTop; y < yMiddle; y++) { \ + \ + /* Discard the lines that are degenerate */ \ + if (fabsf(xRight - xLeft) <= 1e-6f) { \ + goto discardTL; \ + } \ + \ + /* Calculation of interpolation factors */ \ float dy = (float)y - y0; \ - float t1 = dy * inv_height; \ - float t2 = dy * inv_y1y0; \ + float t1 = dy * invH20; \ + float t2 = dy * invH10; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex(v0, v2, t1); \ - end = sw_lerp_vertex(v0, v1, t2); \ + start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ + end = sw_lerp_vertex_PNTCH(v0, v1, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ end.screen[1] = (float)y; \ \ - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + if (xLeft > xRight) { \ + sw_vertex_t tmp = start; \ + start = end; \ + end = tmp; \ + } \ + \ FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ \ /* Incremental update */ \ + discardTL: \ xLeft += dx02; \ xRight += dx01; \ } \ @@ -1351,22 +1361,35 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const /* Scanline for the lower part of the triangle */ \ xRight = x1; /* Restart the right side from the second vertex */ \ for (int y = yMiddle; y < yBottom; y++) { \ + \ + /* Discard the lines that are degenerate */ \ + if (fabsf(xRight - xLeft) <= 1e-6f) { \ + goto discardBL; \ + } \ + \ + /* Calculation of interpolation factors */ \ float dy = (float)y - y0; \ - float t1 = dy * inv_height; \ - float t2 = (float)(y - y1) * inv_y2y1; \ + float t1 = dy * invH20; \ + float t2 = (float)(y - y1) * invH21; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex(v0, v2, t1); \ - end = sw_lerp_vertex(v1, v2, t2); \ + start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ + end = sw_lerp_vertex_PNTCH(v1, v2, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ end.screen[1] = (float)y; \ \ - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + if (xLeft > xRight) { \ + sw_vertex_t tmp = start; \ + start = end; \ + end = tmp; \ + } \ + \ FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ \ /* Incremental update */ \ + discardBL: \ xLeft += dx02; \ xRight += dx12; \ } \ From 363f3afa60ce4bd721629d7d811dcac981cc5c3a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 04:19:12 +0100 Subject: [PATCH 011/105] review ndc to screen projection --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c6254841a..83bf21f9a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1010,7 +1010,7 @@ static inline void sw_blend_colors(float dst[4], float src[4]) static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) { screen[0] = RLSW.vpPos[0] + (ndc[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - screen[1] = RLSW.vpPos[1] + (ndc[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + screen[1] = RLSW.vpPos[1] + (1.0f - ndc[1]) * 0.5f * RLSW.vpDim[1]; } From 6b8978cb3e0f84e6cc3090ad4b98cd1d00a41bff Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 20:53:18 +0100 Subject: [PATCH 012/105] avoid to recalculate MVP when its not needed + tweaks --- src/external/rlsw.h | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 83bf21f9a..5ef11bf58 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -430,6 +430,7 @@ typedef struct { SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) bool modelMatrixUsed; // Flag indicating if the model matrix is used + bool needToUpdateMVP; SWfactor srcFactor; SWfactor dstFactor; @@ -1467,7 +1468,7 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* /* === Line Rendering Part === */ -uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) +static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) { uint8_t code = SW_CLIP_INSIDE; if (screen[0] < xMin) code |= SW_CLIP_LEFT; @@ -1477,7 +1478,7 @@ uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xM return code; } -bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) { int xMin = RLSW.vpMin[0]; int yMin = RLSW.vpMin[1]; @@ -1531,7 +1532,7 @@ bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) return accept; } -bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) { if (fabsf(p) < SW_CLIP_EPSILON) { // Check if the line is entirely outside the window @@ -1552,7 +1553,7 @@ bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) return 1; } -bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) { // TODO: Lerp all vertices here, not just homogeneous coordinates @@ -1587,7 +1588,7 @@ bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) return true; } -bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) +static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) { sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); @@ -1619,7 +1620,7 @@ bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) } #define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ -void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ +static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ int x1 = (int)v0->screen[0]; \ int y1 = (int)v0->screen[1]; \ @@ -2089,6 +2090,8 @@ void swPopMatrix(void) void swLoadIdentity(void) { sw_matrix_id(*RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swTranslatef(float x, float y, float z) @@ -2101,6 +2104,8 @@ void swTranslatef(float x, float y, float z) mat[14] = z; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swRotatef(float angle, float x, float y, float z) @@ -2143,6 +2148,8 @@ void swRotatef(float angle, float x, float y, float z) mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swScalef(float x, float y, float z) @@ -2155,11 +2162,15 @@ void swScalef(float x, float y, float z) mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swMultMatrixf(const float* mat) { sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) @@ -2191,6 +2202,8 @@ void swFrustum(double left, double right, double bottom, double top, double znea mat[15] = 0.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) @@ -2222,6 +2235,8 @@ void swOrtho(double left, double right, double bottom, double top, double znear, mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swViewport(int x, int y, int width, int height) @@ -2394,9 +2409,11 @@ void swVertex4fv(const float* v) if (RLSW.vertexCounter == neededVertices) { - // TODO: Optimize MVP calculation - sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); - sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + if (RLSW.needToUpdateMVP) { + RLSW.needToUpdateMVP = false; + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + } switch (RLSW.drawMode) { case SW_POINTS: From 9be90fd5e6c089a278ac77dd99a9d47fd2c8ad97 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 21:54:18 +0100 Subject: [PATCH 013/105] review the loading and management of textures to be closer to the OpenGL API --- src/external/rlsw.h | 229 +++++++++++++++++++++++++++++++------------- 1 file changed, 165 insertions(+), 64 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5ef11bf58..9f02ddd4a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -134,6 +134,19 @@ #define GL_STACK_UNDERFLOW 0x0504 #define GL_OUT_OF_MEMORY 0x0505 +#define GL_ALPHA 0x1906 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 + +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 /* === RLSW Enums === */ @@ -189,31 +202,21 @@ typedef enum { } SWfactor; typedef enum { - SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) - SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) - SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) - SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp -} SWpixelformat; + SW_LUMINANCE = GL_LUMINANCE, + SW_LUMINANCE_ALPHA = GL_LUMINANCE_ALPHA, + SW_RGB = GL_RGB, + SW_RGBA = GL_RGBA, +} SWformat; + +typedef enum { + SW_UNSIGNED_BYTE = GL_UNSIGNED_BYTE, + SW_BYTE = GL_BYTE, + SW_UNSIGNED_SHORT = GL_UNSIGNED_SHORT, + SW_SHORT = GL_SHORT, + SW_UNSIGNED_INT = GL_UNSIGNED_INT, + SW_INT = GL_INT, + SW_FLOAT = GL_FLOAT +} SWtype; typedef enum { SW_NEAREST = GL_NEAREST, @@ -312,10 +315,11 @@ void swNormal3fv(const float* v); void swBindArray(SWarray type, void *buffer); void swDrawArrays(SWdraw mode, int offset, int count); -uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); -void swUnloadTexture(uint32_t id); +void swGenTextures(int count, uint32_t* textures); +void swDeleteTextures(int count, uint32_t* textures); -void swTextureParameters(uint32_t id, int param, int value); +void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data); +void swTexParameteri(int param, int value); void swBindTexture(uint32_t id); #endif // RLSW_H @@ -348,6 +352,33 @@ void swBindTexture(uint32_t id); /* === Internal Structs === */ +typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp +} sw_pixelformat_e; + typedef float sw_matrix_t[4*4]; typedef uint16_t sw_half_t; @@ -531,7 +562,54 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve } -/* === Pixel Format Conversion Part === */ +/* === Pixel Format Part === */ + +int sw_get_pixel_format(SWformat format, SWtype type) +{ + int channels = 0; + int bitsPerChannel = 8; // Default: 8 bits per channel + + // Determine the number of channels (format) + switch (format) { + case SW_LUMINANCE: channels = 1; break; + case SW_LUMINANCE_ALPHA: channels = 2; break; + case SW_RGB: channels = 3; break; + case SW_RGBA: channels = 4; break; + default: return -1; // Unknown format + } + + // Determine the depth of each channel (type) + switch (type) { + case SW_UNSIGNED_BYTE: bitsPerChannel = 8; break; + case SW_BYTE: bitsPerChannel = 8; break; + case SW_UNSIGNED_SHORT: bitsPerChannel = 16; break; + case SW_SHORT: bitsPerChannel = 16; break; + case SW_UNSIGNED_INT: bitsPerChannel = 32; break; + case SW_INT: bitsPerChannel = 32; break; + case SW_FLOAT: bitsPerChannel = 32; break; + default: return -1; // Unknown type + } + + // Map the format and type to the correct internal format + if (bitsPerChannel == 8) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE; + if (channels == 2) return SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8; + } + else if (bitsPerChannel == 16) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R16; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16; + } + else if (bitsPerChannel == 32) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R32; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32; + } + + return -1; // Unsupported format +} static inline uint32_t sw_cvt_hf_ui(uint16_t h) { @@ -690,7 +768,7 @@ static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, color[3] = pixel[3]; } -static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) +static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, sw_pixelformat_e format) { switch (format) { @@ -1773,7 +1851,7 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) /* === Some Validity Check Helper === */ -static inline bool sw_is_texture_id_valid(uint32_t id) +static inline bool sw_is_texture_valid(uint32_t id) { bool valid = true; @@ -2718,53 +2796,76 @@ void swDrawArrays(SWdraw mode, int offset, int count) swEnd(); } -uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) +void swGenTextures(int count, uint32_t* textures) { - if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow - return 0; + if (count == 0 || textures == NULL) { + return; } - sw_texture_t texture = { 0 }; - texture.pixels = data; - texture.width = width; - texture.height = height; - texture.format = format; - texture.minFilter = SW_NEAREST; - texture.magFilter = SW_NEAREST; - texture.sWrap = SW_REPEAT; - texture.tWrap = SW_REPEAT; - texture.tx = 1.0f / width; - texture.ty = 1.0f / height; - (void)mipmapCount; - - uint32_t id = 0; - if (RLSW.freeTextureIdCount > 0) { - id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; - } - else { - id = RLSW.loadedTextureCount++; + for (int i = 0; i < count; i++) { + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow + return; + } + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + RLSW.loadedTextures[id] = RLSW.loadedTextures[0]; + textures[i] = id; } - - RLSW.loadedTextures[id] = texture; - - return id; } -void swUnloadTexture(uint32_t id) +void swDeleteTextures(int count, uint32_t* textures) { - if (!sw_is_texture_id_valid(id)) { + if (count == 0 || textures == NULL) { + return; + } + + for (int i = 0; i < count; i++) { + if (!sw_is_texture_valid(textures[i])) { + RLSW.errCode = SW_INVALID_VALUE; + continue; + } + RLSW.loadedTextures[textures[i]].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i]; + } +} + +void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data) +{ + uint32_t id = RLSW.currentTexture; + + if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } - RLSW.loadedTextures[id].pixels = 0; - RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; + int pixelFormat = sw_get_pixel_format(format, type); + + if (pixelFormat < 0) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + texture->pixels = data; + texture->width = width; + texture->height = height; + texture->format = pixelFormat; + texture->tx = 1.0f / width; + texture->ty = 1.0f / height; } -void swTextureParameters(uint32_t id, int param, int value) +void swTexParameteri(int param, int value) { - if (!sw_is_texture_id_valid(id)) { + uint32_t id = RLSW.currentTexture; + + if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } From d3fe48fa7de232db7379d89705ecf420f21495f3 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 22:54:05 +0100 Subject: [PATCH 014/105] texture sampling optimization --- src/external/rlsw.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 9f02ddd4a..c166583cb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -922,27 +922,26 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa // at the wrong moment during rasterization. It would be worth reviewing // this, although the scanline method complicates things. + // Previous method: There is no need to compute the square root + // because using the squared value, the comparison remains `L2 > 1.0f * 1.0f` + //float du = sqrtf(xDu * xDu + yDu * yDu); + //float dv = sqrtf(xDv * xDv + yDv * yDv); + //float L = (du > dv) ? du : dv; + // Calculate the derivatives for each axis - float du = sqrtf(xDu * xDu + yDu * yDu); - float dv = sqrtf(xDv * xDv + yDv * yDv); - float L = (du > dv) ? du : dv; + float du2 = xDu * xDu + yDu * yDu; + float dv2 = xDv * xDv + yDv * yDv; + float L2 = (du2 > dv2) ? du2 : dv2; - // Select the filter based on the size of the footprint - if (L > 1.0f) { - // Minification - if (tex->minFilter == SW_NEAREST) { - sw_texture_sample_nearest(color, tex, u, v); - } else if (tex->minFilter == SW_LINEAR) { - sw_texture_sample_linear(color, tex, u, v); - } - } else { - // Magnification - if (tex->magFilter == SW_NEAREST) { - sw_texture_sample_nearest(color, tex, u, v); - } else if (tex->magFilter == SW_LINEAR) { - sw_texture_sample_linear(color, tex, u, v); - } + bool useMinFilter = (L2 > 1.0f); + int filter = useMinFilter ? tex->minFilter : tex->magFilter; + + if (filter == SW_NEAREST) { + sw_texture_sample_nearest(color, tex, u, v); } + else /* SW_LINEAR */ { + sw_texture_sample_linear(color, tex, u, v); + } } @@ -1949,6 +1948,7 @@ void swInit(int w, int h) RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.matView; + RLSW.needToUpdateMVP = true; sw_matrix_id(RLSW.matProjection); sw_matrix_id(RLSW.matTexture); From 2666eedeb07020c276a18cfc44879a1de292f466 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 23:13:34 +0100 Subject: [PATCH 015/105] review get pixel functions + review unorm/float conversion --- src/external/rlsw.h | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c166583cb..78c171261 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -337,6 +337,12 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) +#define SW_FLOAT_TO_UNORM8(x) ((uint8_t)((x) * UINT8_MAX)) +#define SW_FLOAT_TO_UNORM16(x) ((uint16_t)((x) * UINT16_MAX)) + +#define SW_UNORM8_TO_FLOAT(x) ((float)(x) * (1.0f / UINT8_MAX)) +#define SW_UNORM16_TO_FLOAT(x) ((float)(x) * (1.0f / UINT16_MAX)) + #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) #define SW_STATE_TEXTURE_2D (1 << 0) @@ -639,7 +645,7 @@ static inline float sw_cvt_hf(sw_half_t y) static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { - float gray = (float)((uint8_t*)pixels)[offset] / 255; + float gray = SW_UNORM8_TO_FLOAT(((uint8_t*)pixels)[offset]); color[0] = gray; color[1] = gray; @@ -669,13 +675,10 @@ static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_ static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) { - float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; - float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + const uint8_t* pixelData = (const uint8_t*)pixels + 2 * offset; - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = alpha; + color[0] = color[1] = color[2] = SW_UNORM8_TO_FLOAT(pixelData[0]); + color[3] = SW_UNORM8_TO_FLOAT(pixelData[1]); } static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) @@ -690,11 +693,11 @@ static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32 static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) { - const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + const uint8_t* pixel = (const uint8_t*)pixels + 3 * offset; - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; + color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); + color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); + color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); color[3] = 1.0f; } @@ -742,10 +745,10 @@ static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint { const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = (float)pixel[3] / 255; + color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); + color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); + color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); + color[3] = SW_UNORM8_TO_FLOAT(pixel[3]); } static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) @@ -1292,12 +1295,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, if (ENABLE_DEPTH_TEST) { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ if (ENABLE_COLOR_BLEND) \ { \ @@ -1748,11 +1751,11 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) continue; \ } \ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ int color_index = 4 * pixel_index; \ uint8_t* cptr = &colorBuffer[color_index]; \ @@ -1790,11 +1793,11 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) continue; \ } \ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ int color_index = 4 * pixel_index; \ uint8_t* cptr = &colorBuffer[color_index]; \ From cdf384794d157f518e537be3363743a5e7e2650d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 15 Mar 2025 04:13:37 +0100 Subject: [PATCH 016/105] add several buffer format support Several depth and color formats have been added for the framebuffer. 8-bit, 16-bit, and 24-bit formats are now available for depth. RGB 8-bit (332), RGB 16-bit (565), and RGB 24-bit (888) formats are now available for color. Alpha support is no longer present for the framebuffer at the moment, but it can easily be restored by adding the formats and reinterpolating the alpha in the areas that do not perform color blending. Additionally, this commit brings performance improvements. --- src/external/rlsw.h | 454 +++++++++++++++++++++++++++++++++----------- 1 file changed, 339 insertions(+), 115 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 78c171261..3b8d4b363 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -39,6 +39,14 @@ # define SW_FREE(ptr) free(ptr) #endif +#ifndef SW_COLOR_BUFFER_BITS +# define SW_COLOR_BUFFER_BITS 24 +#endif + +#ifndef SW_DEPTH_BUFFER_BITS +# define SW_DEPTH_BUFFER_BITS 16 +#endif + #ifndef SW_MAX_PROJECTION_STACK_SIZE # define SW_MAX_PROJECTION_STACK_SIZE 2 #endif @@ -337,11 +345,8 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) -#define SW_FLOAT_TO_UNORM8(x) ((uint8_t)((x) * UINT8_MAX)) -#define SW_FLOAT_TO_UNORM16(x) ((uint16_t)((x) * UINT16_MAX)) - -#define SW_UNORM8_TO_FLOAT(x) ((float)(x) * (1.0f / UINT8_MAX)) -#define SW_UNORM16_TO_FLOAT(x) ((float)(x) * (1.0f / UINT16_MAX)) +#define SW_COLOR_PIXEL_SIZE (SW_COLOR_BUFFER_BITS / 8) +#define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS / 8) #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) @@ -419,16 +424,16 @@ typedef struct { } sw_texture_t; typedef struct { - uint8_t *color; // 32-bit RGBA color buffer - uint16_t *depth; // 16-bit fixed fract buffer + void *color; + void *depth; int width, height; } sw_framebuffer_t; typedef struct { sw_framebuffer_t framebuffer; - uint8_t clearColor[4]; // Color used to clear the screen - uint16_t clearDepth; // Depth value used to clear the screen + float clearColor[4]; // Color used to clear the screen + float clearDepth; // Depth value used to clear the screen uint32_t currentTexture; sw_matrix_t *currentMatrix; @@ -568,9 +573,224 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve } +/* === Framebuffer Part === */ + +static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) +{ + int size = w * h; + + *color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); + *depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + + // TODO: Handle memory allocation failure +} + +static inline void* sw_get_color_address(void* ptr, uint32_t offset) +{ + return (uint8_t*)ptr + offset * SW_COLOR_PIXEL_SIZE; +} + +static inline void sw_inc_color_address(void** ptr) +{ + *ptr = (void*)(((uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); +} + +static inline void* sw_get_depth_address(void* ptr, uint32_t offset) +{ + return (uint8_t*)ptr + offset * SW_DEPTH_PIXEL_SIZE; +} + +static inline void sw_inc_depth_address(void** ptr) +{ + *ptr = (void*)(((uint8_t*)*ptr) + SW_DEPTH_PIXEL_SIZE); +} + +static inline void sw_read_color(float dst[4], const void* src) +{ +#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 + uint8_t pixel = ((uint8_t*)src)[0]; + dst[0] = ((pixel >> 5) & 0x07) * (1.0f / 7.0f); + dst[1] = ((pixel >> 2) & 0x07) * (1.0f / 7.0f); + dst[2] = (pixel & 0x03) * (1.0f / 3.0f); + dst[3] = 1.0f; +#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 + uint16_t pixel = ((uint16_t*)src)[0]; + dst[0] = ((pixel >> 11) & 0x1F) * (1.0f / 31.0f); + dst[1] = ((pixel >> 5) & 0x3F) * (1.0f / 63.0f); + dst[2] = (pixel & 0x1F) * (1.0f / 31.0f); + dst[3] = 1.0f; +#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 + dst[0] = ((uint8_t*)src)[0] * (1.0f / 255.0f); + dst[1] = ((uint8_t*)src)[1] * (1.0f / 255.0f); + dst[2] = ((uint8_t*)src)[2] * (1.0f / 255.0f); + dst[3] = 1.0f; +#endif +} + +static inline void sw_write_color(void* dst, float color[3]) +{ +#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + ((uint8_t*)dst)[0] = (r << 5) | (g << 2) | b; + +#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + ((uint16_t*)dst)[0] = (r << 11) | (g << 5) | b; + +#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 + ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); + ((uint8_t*)dst)[1] = (uint8_t)(color[1] * UINT8_MAX); + ((uint8_t*)dst)[2] = (uint8_t)(color[2] * UINT8_MAX); +#endif +} + +static inline float sw_read_depth(const void* src) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 16) + return (float)((uint16_t*)src)[0] * (1.0f / UINT16_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth24 = (((uint8_t*)src)[0] << 16) | + (((uint8_t*)src)[1] << 8) | + ((uint8_t*)src)[2]; + return depth24 / (float)0xFFFFFF; +#endif +} + +static inline void sw_write_depth(void* dst, float depth) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 16) + ((uint16_t*)dst)[0] = (uint16_t)(depth * UINT16_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth24 = (uint32_t)(depth * 0xFFFFFF); + ((uint8_t*)dst)[0] = (depth24 >> 16) & 0xFF; + ((uint8_t*)dst)[1] = (depth24 >> 8) & 0xFF; + ((uint8_t*)dst)[2] = depth24 & 0xFF; +#endif +} + +static inline void sw_fill_color(void* ptr, int size, float color[4]) +{ +#if (SW_COLOR_BUFFER_BITS == 8) + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = (r << 5) | (g << 2) | b; + } +#elif (SW_COLOR_BUFFER_BITS == 16) + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t* p = (uint16_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = (r << 11) | (g << 5) | b; + } +#elif (SW_COLOR_BUFFER_BITS == 24) + uint8_t r = (uint8_t)(color[0] * 255); + uint8_t g = (uint8_t)(color[1] * 255); + uint8_t b = (uint8_t)(color[2] * 255); + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + *p++ = r; + *p++ = g; + *p++ = b; + } +#endif +} + +static inline void sw_fill_depth(void* ptr, int size, float value) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + uint8_t v = value * UINT8_MAX; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = v; + } +#elif (SW_DEPTH_BUFFER_BITS == 16) + uint16_t v = value * UINT16_MAX; + uint16_t* p = (uint16_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = v; + } +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t v = value * UINT32_MAX; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + *p++ = (v >> 16) & 0xFF; + *p++ = (v >> 8) & 0xFF; + *p++ = v & 0xFF; + } +#endif +} + +static inline void sw_fill_color_and_depth(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +{ +#if (SW_COLOR_BUFFER_BITS == 8) + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t* color_p = (uint8_t*)color_ptr; +#elif (SW_COLOR_BUFFER_BITS == 16) + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t* color_p = (uint16_t*)color_ptr; +#elif (SW_COLOR_BUFFER_BITS == 24) + uint8_t r = (uint8_t)(color[0] * 255); + uint8_t g = (uint8_t)(color[1] * 255); + uint8_t b = (uint8_t)(color[2] * 255); + uint8_t* color_p = (uint8_t*)color_ptr; +#endif + +#if (SW_DEPTH_BUFFER_BITS == 8) + uint8_t depth_v = depth_value * UINT8_MAX; + uint8_t* depth_p = (uint8_t*)depth_ptr; +#elif (SW_DEPTH_BUFFER_BITS == 16) + uint16_t depth_v = depth_value * UINT16_MAX; + uint16_t* depth_p = (uint16_t*)depth_ptr; +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth_v = depth_value * UINT32_MAX; + uint8_t* depth_p = (uint8_t*)depth_ptr; +#endif + + for (int i = 0; i < size; i++) { + // Remplir le buffer de couleurs +#if (SW_COLOR_BUFFER_BITS == 8) + color_p[i] = (r << 5) | (g << 2) | b; +#elif (SW_COLOR_BUFFER_BITS == 16) + color_p[i] = (r << 11) | (g << 5) | b; +#elif (SW_COLOR_BUFFER_BITS == 24) + *color_p++ = r; + *color_p++ = g; + *color_p++ = b; +#endif + + // Remplir le buffer de profondeur +#if (SW_DEPTH_BUFFER_BITS == 8) + depth_p[i] = depth_v; +#elif (SW_DEPTH_BUFFER_BITS == 16) + depth_p[i] = depth_v; +#elif (SW_DEPTH_BUFFER_BITS == 24) + *depth_p++ = (depth_v >> 16) & 0xFF; + *depth_p++ = (depth_v >> 8) & 0xFF; + *depth_p++ = depth_v & 0xFF; +#endif + } +} + + /* === Pixel Format Part === */ -int sw_get_pixel_format(SWformat format, SWtype type) +static inline int sw_get_pixel_format(SWformat format, SWtype type) { int channels = 0; int bitsPerChannel = 8; // Default: 8 bits per channel @@ -645,7 +865,7 @@ static inline float sw_cvt_hf(sw_half_t y) static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { - float gray = SW_UNORM8_TO_FLOAT(((uint8_t*)pixels)[offset]); + float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); color[0] = gray; color[1] = gray; @@ -677,8 +897,8 @@ static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels { const uint8_t* pixelData = (const uint8_t*)pixels + 2 * offset; - color[0] = color[1] = color[2] = SW_UNORM8_TO_FLOAT(pixelData[0]); - color[3] = SW_UNORM8_TO_FLOAT(pixelData[1]); + color[0] = color[1] = color[2] = (float)pixelData[0] * (1.0f / 255); + color[3] = (float)pixelData[1] * (1.0f / 255); } static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) @@ -695,9 +915,9 @@ static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32 { const uint8_t* pixel = (const uint8_t*)pixels + 3 * offset; - color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); - color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); - color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); + color[0] = (float)pixel[0] * (1.0f / 255); + color[1] = (float)pixel[1] * (1.0f / 255); + color[2] = (float)pixel[2] * (1.0f / 255); color[3] = 1.0f; } @@ -745,10 +965,10 @@ static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint { const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); - color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); - color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); - color[3] = SW_UNORM8_TO_FLOAT(pixel[3]); + color[0] = (float)pixel[0] * (1.0f / 255); + color[1] = (float)pixel[1] * (1.0f / 255); + color[2] = (float)pixel[2] * (1.0f / 255); + color[3] = (float)pixel[3] * (1.0f / 255); } static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) @@ -1279,12 +1499,14 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - uint8_t* cptrRow = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ - uint8_t* cptr = cptrRow + xStart * 4; \ + void* cptr = sw_get_color_address( \ + RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart \ + ); \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - uint16_t* dptrRow = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ - uint16_t* dptr = dptrRow; \ + void* dptr = sw_get_depth_address( \ + RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart \ + ); \ \ /* Scanline rasterization loop */ \ for (int x = xStart; x < xEnd; x++) { \ @@ -1292,66 +1514,73 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, float w = 1.0f / (start->homogeneous[3] + t * dw); \ float z = start->homogeneous[2] + t * dz; \ \ - if (ENABLE_DEPTH_TEST) { \ + if (ENABLE_DEPTH_TEST) \ + { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ - float dstColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; \ - float srcColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; \ + float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ \ - if (ENABLE_TEXTURE) { \ + float srcColor[4]; \ + if (ENABLE_TEXTURE) \ + { \ sw_texture_sample(srcColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + srcColor[0] *= (start->color[0] + t * dcol[0]) * w; \ + srcColor[1] *= (start->color[1] + t * dcol[1]) * w; \ + srcColor[2] *= (start->color[2] + t * dcol[2]) * w; \ + srcColor[3] *= (start->color[3] + t * dcol[3]) * w; \ } \ - \ - for (int i = 0; i < 4; i++) { \ - dstColor[i] = (float)cptr[i] / 255; \ - srcColor[i] *= (start->color[i] + t * dcol[i]) * w; \ + else \ + { \ + srcColor[0] = (start->color[0] + t * dcol[0]) * w; \ + srcColor[1] = (start->color[1] + t * dcol[1]) * w; \ + srcColor[2] = (start->color[2] + t * dcol[2]) * w; \ + srcColor[3] = (start->color[3] + t * dcol[3]) * w; \ } \ \ sw_blend_colors(dstColor, srcColor); \ \ - for (int i = 0; i < 4; i++) { \ - cptr[i] = (uint8_t)(sw_saturate(dstColor[i]) * 255); \ - } \ + dstColor[0] = sw_saturate(dstColor[0]); \ + dstColor[1] = sw_saturate(dstColor[1]); \ + dstColor[2] = sw_saturate(dstColor[2]); \ + \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ if (ENABLE_TEXTURE) \ { \ - /* Sample the texture */ \ - float texColor[4]; \ - sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ - \ - /* Interpolate the color and modulate by the texture color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = texColor[i]; \ - finalColor *= (start->color[i] + t * dcol[i]) * w; \ - cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ - } \ + float color[4]; \ + sw_texture_sample(color, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ + color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ + color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ + sw_write_color(cptr, color); \ } \ else \ { \ - /* Interpolate the color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = (start->color[i] + t * dcol[i]) * w; \ - cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ - } \ + float color[3]; \ + color[0] = sw_saturate((start->color[0] + t * dcol[0]) * w); \ + color[1] = sw_saturate((start->color[1] + t * dcol[1]) * w); \ + color[2] = sw_saturate((start->color[2] + t * dcol[2]) * w); \ + sw_write_color(cptr, color); \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - cptr += 4; \ - dptr++; \ + sw_inc_color_address(&cptr); \ + sw_inc_depth_address(&dptr); \ if (ENABLE_TEXTURE) { \ u += xDu; \ v += xDv; \ @@ -1747,38 +1976,39 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int x = x1 + (j >> 16); \ int y = y1 + i; \ float z = z1 + t * zDiff; \ - int pixel_index = y * fbWidth + x; \ + int offset = y * fbWidth + x; \ \ - uint16_t* dptr = &depthBuffer[pixel_index]; \ + void* dptr = sw_get_depth_address(depthBuffer, offset); \ if (ENABLE_DEPTH_TEST) { \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ - int color_index = 4 * pixel_index; \ - uint8_t* cptr = &colorBuffer[color_index]; \ + void* cptr = sw_get_depth_address(colorBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ + \ float srcColor[4]; \ - for (int j = 0; j < 4; j++) { \ - dstColor[j] = (float)cptr[i] / 255; \ - srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ - } \ + srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ + \ sw_blend_colors(dstColor, srcColor); \ - for (int j = 0; j < 4; j++) { \ - cptr[j] = (uint8_t)(dstColor[j] * 255); \ - } \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ - } \ + float color[3]; \ + color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + sw_write_color(cptr, color); \ } \ } \ } \ @@ -1789,38 +2019,39 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int x = x1 + i; \ int y = y1 + (j >> 16); \ float z = z1 + t * zDiff; \ - int pixel_index = y * fbWidth + x; \ + int offset = y * fbWidth + x; \ \ - uint16_t* dptr = &depthBuffer[pixel_index]; \ + void* dptr = sw_get_depth_address(depthBuffer, offset); \ if (ENABLE_DEPTH_TEST) { \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ - int color_index = 4 * pixel_index; \ - uint8_t* cptr = &colorBuffer[color_index]; \ + void* cptr = sw_get_depth_address(colorBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ + \ float srcColor[4]; \ - for (int j = 0; j < 4; j++) { \ - dstColor[j] = (float)cptr[i] / 255; \ - srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ - } \ + srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ + \ sw_blend_colors(dstColor, srcColor); \ - for (int j = 0; j < 4; j++) { \ - cptr[j] = (uint8_t)(dstColor[j] * 255); \ - } \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ - } \ + float color[3]; \ + color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + sw_write_color(cptr, color); \ } \ } \ } \ @@ -1934,8 +2165,11 @@ void swInit(int w, int h) { swViewport(0, 0, w, h); - RLSW.framebuffer.color = SW_MALLOC(4 * w * h); - RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + sw_load_framebuffer( + &RLSW.framebuffer.color, + &RLSW.framebuffer.depth, + w, h + ); RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; @@ -1943,11 +2177,11 @@ void swInit(int w, int h) RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); - RLSW.clearColor[0] = 0; - RLSW.clearColor[1] = 0; - RLSW.clearColor[2] = 0; - RLSW.clearColor[3] = 255; - RLSW.clearDepth = UINT16_MAX; + RLSW.clearColor[0] = 0.0f; + RLSW.clearColor[1] = 0.0f; + RLSW.clearColor[2] = 0.0f; + RLSW.clearColor[3] = 1.0f; + RLSW.clearDepth = 1.0f; RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.matView; @@ -2348,37 +2582,27 @@ void swViewport(int x, int y, int width, int height) void swClearColor(float r, float g, float b, float a) { - RLSW.clearColor[0] = r * 255; - RLSW.clearColor[1] = g * 255; - RLSW.clearColor[2] = b * 255; - RLSW.clearColor[3] = a * 255; + RLSW.clearColor[0] = r; + RLSW.clearColor[1] = g; + RLSW.clearColor[2] = b; + RLSW.clearColor[3] = a; } void swClear(uint32_t bitmask) { int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - uint32_t* cptr = (uint32_t*)RLSW.framebuffer.color; - uint16_t* dptr = RLSW.framebuffer.depth; - - uint32_t c = *((uint32_t*)RLSW.clearColor); - uint16_t d = RLSW.clearDepth; - if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - for (int i = 0; i < size; i++) { - cptr[i] = c; - dptr[i] = d; - } + sw_fill_color_and_depth( + RLSW.framebuffer.color, RLSW.framebuffer.depth, + size, RLSW.clearColor, RLSW.clearDepth + ); } else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - for (int i = 0; i < size; i++) { - cptr[i] = c; - } + sw_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); } else if (bitmask & SW_DEPTH_BUFFER_BIT) { - for (int i = 0; i < size; i++) { - dptr[i] = d; - } + sw_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); } } From 645b5e9c45091ee03a55040ab7aa896d2e866f0b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 00:09:13 +0100 Subject: [PATCH 017/105] tweaks --- src/external/rlsw.h | 254 +++++++++++++++++++++++--------------------- 1 file changed, 132 insertions(+), 122 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 3b8d4b363..d30db2ac4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -259,10 +259,18 @@ typedef enum { void swInit(int w, int h); void swClose(void); +void* swGetColorBuffer(int* w, int* h); + void swEnable(SWstate state); void swDisable(SWstate state); -void* swGetColorBuffer(int* w, int* h); +void swViewport(int x, int y, int width, int height); + +void swClearColor(float r, float g, float b, float a); +void swClear(uint32_t bitmask); + +void swBlendFunc(SWfactor sfactor, SWfactor dfactor); +void swCullFace(SWface face); void swMatrixMode(SWmatrix mode); void swPushMatrix(void); @@ -275,14 +283,6 @@ void swMultMatrixf(const float* mat); void swFrustum(double left, double right, double bottom, double top, double znear, double zfar); void swOrtho(double left, double right, double bottom, double top, double znear, double zfar); -void swViewport(int x, int y, int width, int height); - -void swClearColor(float r, float g, float b, float a); -void swClear(uint32_t bitmask); - -void swBlendFunc(SWfactor sfactor, SWfactor dfactor); -void swCullFace(SWface face); - void swBegin(SWdraw mode); void swEnd(void); @@ -575,7 +575,7 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve /* === Framebuffer Part === */ -static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) +static inline void sw_framebuffer_load(void** color, void** depth, int w, int h) { int size = w * h; @@ -585,27 +585,27 @@ static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) // TODO: Handle memory allocation failure } -static inline void* sw_get_color_address(void* ptr, uint32_t offset) +static inline void* sw_framebuffer_get_color_addr(void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_COLOR_PIXEL_SIZE; } -static inline void sw_inc_color_address(void** ptr) +static inline void sw_framebuffer_inc_color_addr(void** ptr) { *ptr = (void*)(((uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); } -static inline void* sw_get_depth_address(void* ptr, uint32_t offset) +static inline void* sw_framebuffer_get_depth_addr(void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_DEPTH_PIXEL_SIZE; } -static inline void sw_inc_depth_address(void** ptr) +static inline void sw_framebuffer_inc_depth_addr(void** ptr) { *ptr = (void*)(((uint8_t*)*ptr) + SW_DEPTH_PIXEL_SIZE); } -static inline void sw_read_color(float dst[4], const void* src) +static inline void sw_framebuffer_read_color(float dst[4], const void* src) { #if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t pixel = ((uint8_t*)src)[0]; @@ -627,7 +627,7 @@ static inline void sw_read_color(float dst[4], const void* src) #endif } -static inline void sw_write_color(void* dst, float color[3]) +static inline void sw_framebuffer_write_color(void* dst, float color[3]) { #if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -648,7 +648,7 @@ static inline void sw_write_color(void* dst, float color[3]) #endif } -static inline float sw_read_depth(const void* src) +static inline float sw_framebuffer_read_depth(const void* src) { #if (SW_DEPTH_BUFFER_BITS == 8) return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); @@ -662,7 +662,7 @@ static inline float sw_read_depth(const void* src) #endif } -static inline void sw_write_depth(void* dst, float depth) +static inline void sw_framebuffer_write_depth(void* dst, float depth) { #if (SW_DEPTH_BUFFER_BITS == 8) ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); @@ -676,7 +676,7 @@ static inline void sw_write_depth(void* dst, float depth) #endif } -static inline void sw_fill_color(void* ptr, int size, float color[4]) +static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -707,7 +707,7 @@ static inline void sw_fill_color(void* ptr, int size, float color[4]) #endif } -static inline void sw_fill_depth(void* ptr, int size, float value) +static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { #if (SW_DEPTH_BUFFER_BITS == 8) uint8_t v = value * UINT8_MAX; @@ -732,7 +732,7 @@ static inline void sw_fill_depth(void* ptr, int size, float value) #endif } -static inline void sw_fill_color_and_depth(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +static inline void sw_framebuffer_fill(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -1499,12 +1499,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - void* cptr = sw_get_color_address( \ + void* cptr = sw_framebuffer_get_color_addr( \ RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart \ ); \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - void* dptr = sw_get_depth_address( \ + void* dptr = sw_framebuffer_get_depth_addr( \ RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart \ ); \ \ @@ -1518,17 +1518,17 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ if (ENABLE_TEXTURE) \ @@ -1553,7 +1553,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, dstColor[1] = sw_saturate(dstColor[1]); \ dstColor[2] = sw_saturate(dstColor[2]); \ \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -1564,7 +1564,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ else \ { \ @@ -1572,15 +1572,15 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, color[0] = sw_saturate((start->color[0] + t * dcol[0]) * w); \ color[1] = sw_saturate((start->color[1] + t * dcol[1]) * w); \ color[2] = sw_saturate((start->color[2] + t * dcol[2]) * w); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - sw_inc_color_address(&cptr); \ - sw_inc_depth_address(&dptr); \ + sw_framebuffer_inc_color_addr(&cptr); \ + sw_framebuffer_inc_depth_addr(&dptr); \ if (ENABLE_TEXTURE) { \ u += xDu; \ v += xDv; \ @@ -1978,20 +1978,25 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ float z = z1 + t * zDiff; \ int offset = y * fbWidth + x; \ \ - void* dptr = sw_get_depth_address(depthBuffer, offset); \ + void* dptr = sw_framebuffer_get_depth_addr( \ + depthBuffer, offset \ + ); \ + \ if (ENABLE_DEPTH_TEST) { \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_get_depth_address(colorBuffer, offset); \ + void* cptr = sw_framebuffer_get_depth_addr( \ + colorBuffer, offset \ + ); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ @@ -2000,7 +2005,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ \ sw_blend_colors(dstColor, srcColor); \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -2008,7 +2013,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ } \ @@ -2021,20 +2026,25 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ float z = z1 + t * zDiff; \ int offset = y * fbWidth + x; \ \ - void* dptr = sw_get_depth_address(depthBuffer, offset); \ + void* dptr = sw_framebuffer_get_depth_addr( \ + depthBuffer, offset \ + ); \ + \ if (ENABLE_DEPTH_TEST) { \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_get_depth_address(colorBuffer, offset); \ + void* cptr = sw_framebuffer_get_depth_addr( \ + colorBuffer, offset \ + ); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ @@ -2043,7 +2053,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ \ sw_blend_colors(dstColor, srcColor); \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -2051,7 +2061,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ } \ @@ -2165,7 +2175,7 @@ void swInit(int w, int h) { swViewport(0, 0, w, h); - sw_load_framebuffer( + sw_framebuffer_load( &RLSW.framebuffer.color, &RLSW.framebuffer.depth, w, h @@ -2240,6 +2250,14 @@ void swClose(void) SW_FREE(RLSW.freeTextureIds); } +void* swGetColorBuffer(int* w, int* h) +{ + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; +} + void swEnable(SWstate state) { switch (state) { @@ -2282,12 +2300,76 @@ void swDisable(SWstate state) } } -void* swGetColorBuffer(int* w, int* h) +void swViewport(int x, int y, int width, int height) { - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } - return RLSW.framebuffer.color; + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; +} + +void swClearColor(float r, float g, float b, float a) +{ + RLSW.clearColor[0] = r; + RLSW.clearColor[1] = g; + RLSW.clearColor[2] = b; + RLSW.clearColor[3] = a; +} + +void swClear(uint32_t bitmask) +{ + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { + sw_framebuffer_fill( + RLSW.framebuffer.color, RLSW.framebuffer.depth, + size, RLSW.clearColor, RLSW.clearDepth + ); + } + else if (bitmask & (SW_COLOR_BUFFER_BIT)) { + sw_framebuffer_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); + } + else if (bitmask & SW_DEPTH_BUFFER_BIT) { + sw_framebuffer_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); + } +} + +void swBlendFunc(SWfactor sfactor, SWfactor dfactor) +{ + if (!sw_is_blend_src_factor_valid(sfactor) + || !sw_is_blend_dst_factor_valid(dfactor)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.srcFactor = sfactor; + RLSW.dstFactor = dfactor; +} + +void swCullFace(SWface face) +{ + if (!sw_is_face_valid(face)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.cullFace = face; } void swMatrixMode(SWmatrix mode) @@ -2554,78 +2636,6 @@ void swOrtho(double left, double right, double bottom, double top, double znear, RLSW.needToUpdateMVP = true; } -void swViewport(int x, int y, int width, int height) -{ - if (x <= -width || y <= -height) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.vpPos[0] = x; - RLSW.vpPos[1] = y; - - RLSW.vpDim[0] = width - 1; - RLSW.vpDim[1] = height - 1; - - RLSW.vpMin[0] = (x < 0) ? 0 : x; - RLSW.vpMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; -} - -void swClearColor(float r, float g, float b, float a) -{ - RLSW.clearColor[0] = r; - RLSW.clearColor[1] = g; - RLSW.clearColor[2] = b; - RLSW.clearColor[3] = a; -} - -void swClear(uint32_t bitmask) -{ - int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - - if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - sw_fill_color_and_depth( - RLSW.framebuffer.color, RLSW.framebuffer.depth, - size, RLSW.clearColor, RLSW.clearDepth - ); - } - else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - sw_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); - } - else if (bitmask & SW_DEPTH_BUFFER_BIT) { - sw_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); - } -} - -void swBlendFunc(SWfactor sfactor, SWfactor dfactor) -{ - if (!sw_is_blend_src_factor_valid(sfactor) - || !sw_is_blend_dst_factor_valid(dfactor)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.srcFactor = sfactor; - RLSW.dstFactor = dfactor; -} - -void swCullFace(SWface face) -{ - if (!sw_is_face_valid(face)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.cullFace = face; -} - void swBegin(SWdraw mode) { if (mode < SW_POINTS || mode > SW_QUADS) { From 3bc8f36f146f2dd901776a0c04df93239e75abfd Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 00:50:34 +0100 Subject: [PATCH 018/105] impl line width --- src/external/rlsw.h | 99 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 13 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d30db2ac4..4b18027b0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -272,6 +272,8 @@ void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); +void swLineWidth(float width); + void swMatrixMode(SWmatrix mode); void swPushMatrix(void); void swPopMatrix(void); @@ -1989,7 +1991,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_depth_addr( \ + void* cptr = sw_framebuffer_get_color_addr( \ colorBuffer, offset \ ); \ \ @@ -2037,7 +2039,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_depth_addr( \ + void* cptr = sw_framebuffer_get_color_addr( \ colorBuffer, offset \ ); \ \ @@ -2067,28 +2069,94 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ } \ } +#define DEFINE_LINE_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ +void FUNC_NAME(const sw_vertex_t* v1, const sw_vertex_t* v2) \ +{ \ + sw_vertex_t tv1, tv2; \ + \ + int x1 = (int)v1->screen[0]; \ + int y1 = (int)v1->screen[1]; \ + int x2 = (int)v2->screen[0]; \ + int y2 = (int)v2->screen[1]; \ + \ + int dx = x2 - x1; \ + int dy = y2 - y1; \ + \ + RASTER_FUNC(v1, v2); \ + \ + if (dx != 0 && abs(dy / dx) < 1) { \ + int wy = (int)((RLSW.lineWidth - 1.0f) * abs(dx) / sqrtf(dx * dx + dy * dy)); \ + wy >>= 1; /* Division by 2 via bit shift */ \ + for (int i = 1; i <= wy; i++) { \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[1] -= i; \ + tv2.screen[1] -= i; \ + RASTER_FUNC(&tv1, &tv2); \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[1] += i; \ + tv2.screen[1] += i; \ + RASTER_FUNC(&tv1, &tv2); \ + } \ + } \ + else if (dy != 0) { \ + int wx = (int)((RLSW.lineWidth - 1.0f) * abs(dy) / sqrtf(dx * dx + dy * dy)); \ + wx >>= 1; /* Division by 2 via bit shift */ \ + for (int i = 1; i <= wx; i++) { \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[0] -= i; \ + tv2.screen[0] -= i; \ + RASTER_FUNC(&tv1, &tv2); \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[0] += i; \ + tv2.screen[0] += i; \ + RASTER_FUNC(&tv1, &tv2); \ + } \ + } \ +} + DEFINE_LINE_RASTER(sw_line_raster, 0, 0) DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0) DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1) DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster, sw_line_raster) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH, sw_line_raster_DEPTH) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_BLEND, sw_line_raster_BLEND) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_BLEND) + static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) { if (!sw_line_project_and_clip(v0, v1)) { return; } - if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_line_raster_DEPTH_BLEND(v0, v1); - } - else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_line_raster_BLEND(v0, v1); - } - else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_line_raster_DEPTH(v0, v1); + if (RLSW.lineWidth >= 2.0f) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_thick_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_thick_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_thick_raster_DEPTH(v0, v1); + } + else { + sw_line_thick_raster(v0, v1); + } } else { - sw_line_raster(v0, v1); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_raster_DEPTH(v0, v1); + } + else { + sw_line_raster(v0, v1); + } } } @@ -2173,8 +2241,6 @@ static inline bool sw_is_blend_dst_factor_valid(int blend) void swInit(int w, int h) { - swViewport(0, 0, w, h); - sw_framebuffer_load( &RLSW.framebuffer.color, &RLSW.framebuffer.depth, @@ -2184,6 +2250,8 @@ void swInit(int w, int h) RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; + swViewport(0, 0, w, h); + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); @@ -2372,6 +2440,11 @@ void swCullFace(SWface face) RLSW.cullFace = face; } +void swLineWidth(float width) +{ + RLSW.lineWidth = roundf(width); +} + void swMatrixMode(SWmatrix mode) { switch (mode) { From e15c0d963964b472ee5a34e4cca0e60ce55c7128 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 02:17:35 +0100 Subject: [PATCH 019/105] impl points + point size --- src/external/rlsw.h | 179 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 4b18027b0..574989451 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -272,6 +272,7 @@ void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); +void swPointSize(float size); void swLineWidth(float width); void swMatrixMode(SWmatrix mode); @@ -456,7 +457,7 @@ typedef struct { int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) - float pointSize; // Rasterized point size + float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width sw_matrix_t matProjection; // Projection matrix, user adjustable @@ -2160,6 +2161,174 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) } } + +/* === Point Rendering Part === */ + +static inline bool sw_point_project_and_clip(sw_vertex_t* v) +{ + sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); + + if (v->homogeneous[3] != 1.0f) { + for (int_fast8_t i = 0; i < 3; i++) { + if (v->homogeneous[i] < -v->homogeneous[3] || v->homogeneous[i] > v->homogeneous[3]) { + return false; + } + } + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + } + + sw_project_ndc_to_screen(v->screen, v->homogeneous); + + return v->screen[0] - RLSW.pointRadius >= RLSW.vpMin[0] + && v->screen[1] - RLSW.pointRadius >= RLSW.vpMin[1] + && v->screen[0] + RLSW.pointRadius <= RLSW.vpMax[0] + && v->screen[1] + RLSW.pointRadius <= RLSW.vpMax[1]; +} + +#define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ +static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ +{ \ + if (CHECK_BOUNDS) \ + { \ + if (x < 0 || x >= RLSW.framebuffer.width) { \ + return; \ + } \ + if (y < 0 || y >= RLSW.framebuffer.height) { \ + return; \ + } \ + } \ + \ + int offset = y * RLSW.framebuffer.width + x; \ + \ + void* dptr = sw_framebuffer_get_depth_addr( \ + RLSW.framebuffer.depth, offset \ + ); \ + \ + if (ENABLE_DEPTH_TEST) \ + { \ + float depth = sw_framebuffer_read_depth(dptr); \ + if (z > depth) return; \ + } \ + \ + sw_framebuffer_write_depth(dptr, z); \ + \ + void* cptr = sw_framebuffer_get_color_addr( \ + RLSW.framebuffer.color, offset \ + ); \ + \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + sw_framebuffer_read_color(dstColor, cptr); \ + \ + sw_blend_colors(dstColor, color); \ + sw_framebuffer_write_color(cptr, dstColor); \ + } \ + else \ + { \ + sw_framebuffer_write_color(cptr, color); \ + } \ +} + +#define DEFINE_POINT_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ +static inline void FUNC_NAME(sw_vertex_t* v) \ +{ \ + int cx = v->screen[0]; \ + int cy = v->screen[1]; \ + float cz = v->homogeneous[2]; \ + int radius = RLSW.pointRadius; \ + float* color = v->color; \ + \ + int x = 0; \ + int y = radius; \ + int d = 3 - 2 * radius; \ + \ + while (x <= y) { \ + for (int i = -x; i <= x; i++) { \ + RASTER_FUNC(cx + i, cy + y, cz, color); \ + RASTER_FUNC(cx + i, cy - y, cz, color); \ + } \ + for (int i = -y; i <= y; i++) { \ + RASTER_FUNC(cx + i, cy + x, cz, color); \ + RASTER_FUNC(cx + i, cy - x, cz, color); \ + } \ + if (d > 0) { \ + y--; \ + d = d + 4 * (x - y) + 10; \ + } else { \ + d = d + 4 * x + 6; \ + } \ + x++; \ + } \ +} + +DEFINE_POINT_RASTER(sw_point_raster, 0, 0, 0) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH, 1, 0, 0) +DEFINE_POINT_RASTER(sw_point_raster_BLEND, 0, 1, 0) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND, 1, 1, 0) + +DEFINE_POINT_RASTER(sw_point_raster_CHECK, 0, 0, 1) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1) +DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1) + +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK) + +static inline void sw_point_render(sw_vertex_t* v) +{ + if (!sw_point_project_and_clip(v)) { + return; + } + + if (RLSW.pointRadius >= 1.0f) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH(v); + } + else { + sw_point_thick_raster(v); + } + } + else { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_raster_DEPTH_BLEND( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_raster_BLEND( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_raster_DEPTH( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else { + sw_point_raster( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + } +} + + /* === Some Validity Check Helper === */ static inline bool sw_is_texture_valid(uint32_t id) @@ -2440,6 +2609,11 @@ void swCullFace(SWface face) RLSW.cullFace = face; } +void swPointSize(float size) +{ + RLSW.pointRadius = floorf(size * 0.5f); +} + void swLineWidth(float width) { RLSW.lineWidth = roundf(width); @@ -2805,6 +2979,9 @@ void swVertex4fv(const float* v) switch (RLSW.drawMode) { case SW_POINTS: + sw_point_render( + &RLSW.vertexBuffer[0] + ); break; case SW_LINES: sw_line_render( From 793d56c70157fa74eb454035ed9fa507d7062ff7 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 03:42:33 +0100 Subject: [PATCH 020/105] fix and improve polygon clipping functions --- src/external/rlsw.h | 229 +++++++++++++++++++++++--------------------- 1 file changed, 118 insertions(+), 111 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 574989451..983ec91cf 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1320,144 +1320,151 @@ static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) /* === Triangle Rendering Part === */ -static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) -{ - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - - int inputCounter = *vertexCounter; - *vertexCounter = 0; - - const sw_vertex_t *prevVt = &input[inputCounter-1]; - char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], - (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - return *vertexCounter > 0; +#define DEFINE_CLIP_FUNC(name, FUNC_IS_INSIDE, FUNC_COMPUTE_T) \ +static inline int sw_clip_##name( \ + sw_vertex_t output[SW_MAX_CLIPPED_POLYGON_VERTICES], \ + const sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES], \ + int n) \ +{ \ + const sw_vertex_t *prev = &input[n - 1]; \ + int prevInside = FUNC_IS_INSIDE(prev->homogeneous); \ + int outputCount = 0; \ + \ + for (int i = 0; i < n; i++) { \ + const sw_vertex_t *curr = &input[i]; \ + int currInside = FUNC_IS_INSIDE(curr->homogeneous); \ + \ + /* If transition between interior/exterior, calculate intersection point */ \ + if (prevInside != currInside) { \ + float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \ + output[outputCount++] = sw_lerp_vertex_PNTCH(prev, curr, t); \ + } \ + \ + /* If current vertex inside, add it */ \ + if (currInside) { \ + output[outputCount++] = *curr; \ + } \ + \ + prev = curr; \ + prevInside = currInside; \ + } \ + \ + return outputCount; \ } -static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +#define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON) +#define IS_INSIDE_PLANE_X_POS(h) ((h)[0] <= (h)[3]) +#define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] <= (h)[3]) +#define IS_INSIDE_PLANE_Y_POS(h) ((h)[1] <= (h)[3]) +#define IS_INSIDE_PLANE_Y_NEG(h) (-(h)[1] <= (h)[3]) +#define IS_INSIDE_PLANE_Z_POS(h) ((h)[2] <= (h)[3]) +#define IS_INSIDE_PLANE_Z_NEG(h) (-(h)[2] <= (h)[3]) + +#define COMPUTE_T_PLANE_W(hPrev, hCurr) ((SW_CLIP_EPSILON - (hPrev)[3]) / ((hCurr)[3] - (hPrev)[3])) +#define COMPUTE_T_PLANE_X_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[0]) / (((hPrev)[3] - (hPrev)[0]) - ((hCurr)[3] - (hCurr)[0]))) +#define COMPUTE_T_PLANE_X_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[0]) / (((hPrev)[3] + (hPrev)[0]) - ((hCurr)[3] + (hCurr)[0]))) +#define COMPUTE_T_PLANE_Y_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[1]) / (((hPrev)[3] - (hPrev)[1]) - ((hCurr)[3] - (hCurr)[1]))) +#define COMPUTE_T_PLANE_Y_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[1]) / (((hPrev)[3] + (hPrev)[1]) - ((hCurr)[3] + (hCurr)[1]))) +#define COMPUTE_T_PLANE_Z_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[2]) / (((hPrev)[3] - (hPrev)[2]) - ((hCurr)[3] - (hCurr)[2]))) +#define COMPUTE_T_PLANE_Z_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[2]) / (((hPrev)[3] + (hPrev)[2]) - ((hCurr)[3] + (hCurr)[2]))) + +DEFINE_CLIP_FUNC(w, IS_INSIDE_PLANE_W, COMPUTE_T_PLANE_W) +DEFINE_CLIP_FUNC(x_pos, IS_INSIDE_PLANE_X_POS, COMPUTE_T_PLANE_X_POS) +DEFINE_CLIP_FUNC(x_neg, IS_INSIDE_PLANE_X_NEG, COMPUTE_T_PLANE_X_NEG) +DEFINE_CLIP_FUNC(y_pos, IS_INSIDE_PLANE_Y_POS, COMPUTE_T_PLANE_Y_POS) +DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG) +DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS) +DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG) + +static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { - for (int iAxis = 0; iAxis < 3; iAxis++) - { - if (*vertexCounter == 0) return false; + sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int n = *vertexCounter; - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - int inputCounter; - - const sw_vertex_t *prevVt; - char prevDot; - - // Clip against first plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - if (*vertexCounter == 0) return false; - - // Clip against opposite plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } + #define CLIP_AGAINST_PLANE(FUNC_CLIP) \ + { \ + n = FUNC_CLIP(tmp, polygon, n); \ + if (n == 0) return false; \ + for (int i = 0; i < n; i++) { \ + polygon[i] = tmp[i]; \ + } \ } - return *vertexCounter > 0; + CLIP_AGAINST_PLANE(sw_clip_w); + CLIP_AGAINST_PLANE(sw_clip_x_pos); + CLIP_AGAINST_PLANE(sw_clip_x_neg); + CLIP_AGAINST_PLANE(sw_clip_y_pos); + CLIP_AGAINST_PLANE(sw_clip_y_neg); + CLIP_AGAINST_PLANE(sw_clip_z_pos); + CLIP_AGAINST_PLANE(sw_clip_z_neg); + + *vertexCounter = n; + + return n > 0; } static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { + // Step 1: MVP projection for all vertices for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); + sw_vec4_transform(polygon[i].homogeneous, polygon[i].position, RLSW.matMVP); } + // Step 2: Face culling - discard triangles facing away if (RLSW.stateFlags & SW_STATE_CULL_FACE) { - float x0 = polygon[0].homogeneous[0], y0 = polygon[0].homogeneous[1]; - float x1 = polygon[1].homogeneous[0], y1 = polygon[1].homogeneous[1]; - float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; - float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); - if ((RLSW.cullFace == SW_FRONT && sgnArea >= 0) || (RLSW.cullFace == SW_BACK && sgnArea <= 0)) { + // NOTE: Face culling is done before clipping to avoid unnecessary computations. + // However, culling requires NDC coordinates, while clipping must be done + // in homogeneous space to correctly interpolate newly generated vertices. + // This means we need to compute 1/W twice: + // - Once before clipping for face culling. + // - Again after clipping for the new vertices. + + const float invW0 = 1.0f / polygon[0].homogeneous[3]; + const float invW1 = 1.0f / polygon[1].homogeneous[3]; + const float invW2 = 1.0f / polygon[2].homogeneous[3]; + + // Compute the signed 2D area (cross product in Z) + const float x0 = polygon[0].homogeneous[0] * invW0, y0 = polygon[0].homogeneous[1] * invW0; + const float x1 = polygon[1].homogeneous[0] * invW1, y1 = polygon[1].homogeneous[1] * invW1; + const float x2 = polygon[2].homogeneous[0] * invW2, y2 = polygon[2].homogeneous[1] * invW2; + const float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + + // Discard the triangle if it faces the culled direction + if ((RLSW.cullFace == SW_FRONT) ? (sgnArea >= 0) : (sgnArea <= 0)) { *vertexCounter = 0; return; } } + + // Step 3: Clipping and perspective projection + if (sw_triangle_clip(polygon, vertexCounter) && *vertexCounter >= 3) { - if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { + // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; + sw_vertex_t *v = &polygon[i]; // Use &polygon[i] instead of polygon + i // Calculation of the reciprocal of W for normalization - // as well as perspective correct attributes - v->homogeneous[3] = 1.0f / v->homogeneous[3]; + // as well as perspective-correct attributes + const float invW = 1.0f / v->homogeneous[3]; + v->homogeneous[3] = invW; // Division of XYZ coordinates by weight - v->homogeneous[0] *= v->homogeneous[3]; - v->homogeneous[1] *= v->homogeneous[3]; - v->homogeneous[2] *= v->homogeneous[3]; + v->homogeneous[0] *= invW; + v->homogeneous[1] *= invW; + v->homogeneous[2] *= invW; - // Division of texture coordinates (perspective correct) - v->texcoord[0] *= v->homogeneous[3]; - v->texcoord[1] *= v->homogeneous[3]; + // Division of texture coordinates (perspective-correct) + v->texcoord[0] *= invW; + v->texcoord[1] *= invW; - // Division of colors (perspective correct) - v->color[0] *= v->homogeneous[3]; - v->color[1] *= v->homogeneous[3]; - v->color[2] *= v->homogeneous[3]; - v->color[3] *= v->homogeneous[3]; - - // Transform to screen space + // Division of colors (perspective-correct) + v->color[0] *= invW; + v->color[1] *= invW; + v->color[2] *= invW; + v->color[3] *= invW; + + // Transformation to screen space sw_project_ndc_to_screen(v->screen, v->homogeneous); } } From fc52b7710ca68a27bcb6c31d8f3a6cad5f1932fc Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 01:28:41 +0100 Subject: [PATCH 021/105] impl polygone modes --- src/external/rlsw.h | 174 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 135 insertions(+), 39 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 983ec91cf..5fd7a5327 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -103,6 +103,10 @@ //#define GL_QUAD_STRIP 0x0008 //#define GL_POLYGON 0x0009 +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 + //#define GL_CW 0x0900 //#define GL_CCW 0x0901 @@ -187,9 +191,15 @@ typedef enum { SW_POINTS = GL_POINTS, SW_LINES = GL_LINES, SW_TRIANGLES = GL_TRIANGLES, - SW_QUADS = GL_QUADS, + SW_QUADS = GL_QUADS } SWdraw; +typedef enum { + SW_POINT = GL_POINT, + SW_LINE = GL_LINE, + SW_FILL = GL_FILL +} SWpoly; + typedef enum { SW_FRONT = GL_FRONT, SW_BACK = GL_BACK, @@ -270,6 +280,7 @@ void swClearColor(float r, float g, float b, float a); void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); +void swPolygonMode(SWpoly mode); void swCullFace(SWface face); void swPointSize(float size); @@ -456,7 +467,8 @@ typedef struct { sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) + SWdraw drawMode; // Current primitive mode (e.g., lines, triangles) + SWpoly polyMode; // Current polygon filling mode (e.g., lines, triangles) float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width @@ -2336,6 +2348,66 @@ static inline void sw_point_render(sw_vertex_t* v) } +/* === Polygon Modes Rendering Part === */ + +static inline void sw_poly_point_render(void) +{ + for (int i = 0; i < RLSW.vertexCounter; i++) { + sw_point_render(RLSW.vertexBuffer); + } +} + +static inline void sw_poly_line_render(void) +{ + const sw_vertex_t* vertices = RLSW.vertexBuffer; + int cm1 = RLSW.vertexCounter - 1; + sw_vertex_t v0, v1; + + for (int i = 0; i < cm1; i++) { + v0 = vertices[i], v1 = vertices[i + 1]; + sw_line_render(&v0, &v1); + } + + v0 = vertices[cm1], v1 = vertices[0]; + sw_line_render(&v0, &v1); +} + +static inline void sw_poly_fill_render(void) +{ + switch (RLSW.drawMode) { + case SW_POINTS: + sw_point_render( + &RLSW.vertexBuffer[0] + ); + break; + case SW_LINES: + sw_line_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1] + ); + break; + case SW_TRIANGLES: + sw_triangle_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_triangle_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_triangle_render( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } +} + /* === Some Validity Check Helper === */ static inline bool sw_is_texture_valid(uint32_t id) @@ -2359,6 +2431,41 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } +static inline bool sw_is_draw_mode_valid(int mode) +{ + bool result = false; + + switch (mode) { + case SW_POINTS: + case SW_LINES: + case SW_TRIANGLES: + case SW_QUADS: + result = true; + break; + default: + break; + } + + return result; +} + +static inline bool sw_is_poly_mode_valid(int mode) +{ + bool result = false; + + switch (mode) { + case SW_POINT: + case SW_LINE: + case SW_FILL: + result = true; + break; + default: + break; + } + + return result; +} + static inline bool sw_is_face_valid(int face) { return (face == SW_FRONT || face == SW_BACK); @@ -2461,6 +2568,7 @@ void swInit(int w, int h) RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; + RLSW.polyMode = SW_FILL; RLSW.cullFace = SW_BACK; static const float defTex[3*2*2] = @@ -2607,6 +2715,15 @@ void swBlendFunc(SWfactor sfactor, SWfactor dfactor) RLSW.dstFactor = dfactor; } +void swPolygonMode(SWpoly mode) +{ + if (!sw_is_poly_mode_valid(mode)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.polyMode = mode; +} + void swCullFace(SWface face) { if (!sw_is_face_valid(face)) { @@ -2892,7 +3009,7 @@ void swOrtho(double left, double right, double bottom, double top, double znear, void swBegin(SWdraw mode) { - if (mode < SW_POINTS || mode > SW_QUADS) { + if (!sw_is_draw_mode_valid(mode)) { RLSW.errCode = SW_INVALID_ENUM; return; } @@ -2907,49 +3024,49 @@ void swEnd(void) void swVertex2i(int x, int y) { - float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + const float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; swVertex4fv(v); } void swVertex2f(float x, float y) { - float v[4] = { x, y, 0.0f, 1.0f }; + const float v[4] = { x, y, 0.0f, 1.0f }; swVertex4fv(v); } void swVertex2fv(const float* v) { - float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + const float v4[4] = { v[0], v[1], 0.0f, 1.0f }; swVertex4fv(v4); } void swVertex3i(int x, int y, int z) { - float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + const float v[4] = { (float)x, (float)y, (float)z, 1.0f }; swVertex4fv(v); } void swVertex3f(float x, float y, float z) { - float v[4] = { x, y, z, 1.0f }; + const float v[4] = { x, y, z, 1.0f }; swVertex4fv(v); } void swVertex3fv(const float* v) { - float v4[4] = { v[0], v[1], v[2], 1.0f }; + const float v4[4] = { v[0], v[1], v[2], 1.0f }; swVertex4fv(v4); } void swVertex4i(int x, int y, int z, int w) { - float v[4] = { (float)x, (float)y, (float)z, (float)w }; + const float v[4] = { (float)x, (float)y, (float)z, (float)w }; swVertex4fv(v); } void swVertex4f(float x, float y, float z, float w) { - float v[4] = { x, y, z, w }; + const float v[4] = { x, y, z, w }; swVertex4fv(v); } @@ -2984,36 +3101,15 @@ void swVertex4fv(const float* v) sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); } - switch (RLSW.drawMode) { - case SW_POINTS: - sw_point_render( - &RLSW.vertexBuffer[0] - ); + switch (RLSW.polyMode) { + case SW_FILL: + sw_poly_fill_render(); break; - case SW_LINES: - sw_line_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1] - ); + case SW_LINE: + sw_poly_line_render(); break; - case SW_TRIANGLES: - sw_triangle_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - break; - case SW_QUADS: - sw_triangle_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - sw_triangle_render( - &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3], - &RLSW.vertexBuffer[0] - ); + case SW_POINT: + sw_poly_point_render(); break; } From 06e49a048a2901253180ad275926f55eebe06c24 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 01:41:24 +0100 Subject: [PATCH 022/105] add some not planned functions - `glDepthMask` - `glColorMask` --- src/external/rlsw.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5fd7a5327..5103ee55d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -160,6 +160,13 @@ #define GL_UNSIGNED_INT 0x1405 #define GL_FLOAT 0x1406 + +/* === Not Implemented === */ + +#define glDepthMask(x) ((void)(x)) +#define glColorMask(x) ((void)(x)) + + /* === RLSW Enums === */ typedef enum { From fe600a5ea1bcf580cd42fd26949c57d0afaa323c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 02:04:59 +0100 Subject: [PATCH 023/105] framebuffer resizing + handle init failure --- src/external/rlsw.h | 99 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5103ee55d..dd79de64d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -35,6 +35,10 @@ # define SW_MALLOC(sz) malloc(sz) #endif +#ifndef SW_REALLOC +# define SW_REALLOC(ptr, newSz) realloc(ptr, newSz) +#endif + #ifndef SW_FREE # define SW_FREE(ptr) free(ptr) #endif @@ -273,10 +277,11 @@ typedef enum { /* === Public API === */ -void swInit(int w, int h); +bool swInit(int w, int h); void swClose(void); void* swGetColorBuffer(int* w, int* h); +bool swResizeFramebuffer(int w, int h); void swEnable(SWstate state); void swDisable(SWstate state); @@ -447,7 +452,9 @@ typedef struct { typedef struct { void *color; void *depth; - int width, height; + int width; + int height; + int allocSz; } sw_framebuffer_t; typedef struct { @@ -510,12 +517,12 @@ typedef struct { uint32_t stateFlags; -} sw_data_t; +} sw_context_t; /* === Global Data === */ -static sw_data_t RLSW = { 0 }; +static sw_context_t RLSW = { 0 }; /* === Helper Functions === */ @@ -597,14 +604,47 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve /* === Framebuffer Part === */ -static inline void sw_framebuffer_load(void** color, void** depth, int w, int h) +static inline bool sw_framebuffer_load(int w, int h) { int size = w * h; - *color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); - *depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + RLSW.framebuffer.color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); + if (RLSW.framebuffer.color == NULL) return false; - // TODO: Handle memory allocation failure + RLSW.framebuffer.depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + if (RLSW.framebuffer.depth == NULL) return false; + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + RLSW.framebuffer.allocSz = w * h; + + return true; +} + +static inline bool sw_framebuffer_resize(int w, int h) +{ + int newSize = w * h; + + if (newSize <= RLSW.framebuffer.allocSz) { + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + return true; + } + + void* newColor = SW_REALLOC(RLSW.framebuffer.color, newSize); + if (newColor == NULL) return false; + + void* newDepth = SW_REALLOC(RLSW.framebuffer.depth, newSize); + if (newDepth == NULL) return false; + + RLSW.framebuffer.color = newColor; + RLSW.framebuffer.depth = newDepth; + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + RLSW.framebuffer.allocSz = newSize; + + return true; } static inline void* sw_framebuffer_get_color_addr(void* ptr, uint32_t offset) @@ -2529,21 +2569,19 @@ static inline bool sw_is_blend_dst_factor_valid(int blend) /* === Public Implementation === */ -void swInit(int w, int h) +bool swInit(int w, int h) { - sw_framebuffer_load( - &RLSW.framebuffer.color, - &RLSW.framebuffer.depth, - w, h - ); - - RLSW.framebuffer.width = w; - RLSW.framebuffer.height = h; + if (!sw_framebuffer_load(w, h)) { + swClose(); return false; + } swViewport(0, 0, w, h); RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + if (RLSW.loadedTextures == NULL) { swClose(); return false; } + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + if (RLSW.loadedTextures == NULL) { swClose(); return false; } RLSW.clearColor[0] = 0.0f; RLSW.clearColor[1] = 0.0f; @@ -2598,15 +2636,29 @@ void swInit(int w, int h) RLSW.loadedTextures[0].ty = 0.5f; RLSW.loadedTextureCount = 1; + + return true; } void swClose(void) { - SW_FREE(RLSW.framebuffer.color); - SW_FREE(RLSW.framebuffer.depth); + if (RLSW.framebuffer.color != NULL) { + SW_FREE(RLSW.framebuffer.color); + } - SW_FREE(RLSW.loadedTextures); - SW_FREE(RLSW.freeTextureIds); + if (RLSW.framebuffer.depth != NULL) { + SW_FREE(RLSW.framebuffer.depth); + } + + if (RLSW.loadedTextures != NULL) { + SW_FREE(RLSW.loadedTextures); + } + + if (RLSW.freeTextureIds != NULL) { + SW_FREE(RLSW.freeTextureIds); + } + + RLSW = (sw_context_t) { 0 }; } void* swGetColorBuffer(int* w, int* h) @@ -2617,6 +2669,11 @@ void* swGetColorBuffer(int* w, int* h) return RLSW.framebuffer.color; } +bool swResizeFramebuffer(int w, int h) +{ + return sw_framebuffer_resize(w, h); +} + void swEnable(SWstate state) { switch (state) { From 268a410a130ac6003c52605c79e5af244d4178b2 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 02:19:28 +0100 Subject: [PATCH 024/105] add quick notes about line clipping algorithms used --- src/external/rlsw.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index dd79de64d..7dc6173f7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1856,6 +1856,7 @@ static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, in return code; } +// Cohen-Sutherland algorithm, faster but for 2D only static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) { int xMin = RLSW.vpMin[0]; @@ -1931,6 +1932,7 @@ static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) return 1; } +// Liang-Barsky algorithm variant, more robust but slightly slower static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) { // TODO: Lerp all vertices here, not just homogeneous coordinates From d3b6fa4785b10b6b8fd6a458b5a61e1efcff9af3 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 04:21:27 +0100 Subject: [PATCH 025/105] start to impl scissor test + review line clipping The support for the scissor test has been implemented for clearing as well as for triangle clipping. The implementation for lines and points is still missing. I also removed the 2D clipping of lines that used the Cohen-Sutherland algorithm, opting instead to always use the Liang-Barsky algorithm in all cases. This simplifies the implementation, and the 2D version would have caused issues when interpolating vertices in the future if we want to implement additional features. --- src/external/rlsw.h | 437 ++++++++++++++++++++++++++++---------------- 1 file changed, 278 insertions(+), 159 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 7dc6173f7..5f683a3b9 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -28,6 +28,9 @@ #include #include +// TODO: Review the use of viewport dimensions stored with -1 +// It seems there are issues with the NDC -> screen projection +// Also, consider testing and reviewing, if necessary, scissor clipping as well as line clipping /* === RLSW Definition And Macros === */ @@ -78,6 +81,7 @@ /* === OpenGL Definitions === */ +#define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 @@ -174,6 +178,7 @@ /* === RLSW Enums === */ typedef enum { + SW_SCISSOR_TEST = GL_SCISSOR_TEST, SW_TEXTURE_2D = GL_TEXTURE_2D, SW_DEPTH_TEST = GL_DEPTH_TEST, SW_CULL_FACE = GL_CULL_FACE, @@ -287,6 +292,7 @@ void swEnable(SWstate state); void swDisable(SWstate state); void swViewport(int x, int y, int width, int height); +void swScissor(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(uint32_t bitmask); @@ -376,10 +382,11 @@ void swBindTexture(uint32_t id); #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) -#define SW_STATE_TEXTURE_2D (1 << 0) -#define SW_STATE_DEPTH_TEST (1 << 1) -#define SW_STATE_CULL_FACE (1 << 2) -#define SW_STATE_BLEND (1 << 3) +#define SW_STATE_SCISSOR_TEST (1 << 0) +#define SW_STATE_TEXTURE_2D (1 << 1) +#define SW_STATE_DEPTH_TEST (1 << 2) +#define SW_STATE_CULL_FACE (1 << 3) +#define SW_STATE_BLEND (1 << 4) #define SW_CLIP_INSIDE (0x00) // 0000 #define SW_CLIP_LEFT (0x01) // 0001 @@ -471,6 +478,13 @@ typedef struct { int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + int scPos[2]; // Represents the top-left corner of the scissor rect + int scDim[2]; // Represents the dimensions of the scissor rect (minus one) + int scMin[2]; // Represents the minimum renderable point of the scissor rect (top-left) + int scMax[2]; // Represents the maximum renderable point of the scissor rect (bottom-right) + float scHMax[2]; + float scHMin[2]; + struct { float* positions; float* texcoords; @@ -745,26 +759,59 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = (r << 5) | (g << 2) | b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = (r << 5) | (g << 2) | b; + } + } + } else { + for (int i = 0; i < size; i++) { + p[i] = (r << 5) | (g << 2) | b; + } } #elif (SW_COLOR_BUFFER_BITS == 16) uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; uint16_t* p = (uint16_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = (r << 11) | (g << 5) | b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = (r << 11) | (g << 5) | b; + } + } + } else { + for (int i = 0; i < size; i++) { + p[i] = (r << 11) | (g << 5) | b; + } } #elif (SW_COLOR_BUFFER_BITS == 24) uint8_t r = (uint8_t)(color[0] * 255); uint8_t g = (uint8_t)(color[1] * 255); uint8_t b = (uint8_t)(color[2] * 255); uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - *p++ = r; - *p++ = g; - *p++ = b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + int offset = (y * RLSW.framebuffer.width + x) * 3; + p[offset + 0] = r; + p[offset + 1] = g; + p[offset + 2] = b; + } + } + } else { + for (int i = 0; i < size; i++) { + *p++ = r; + *p++ = g; + *p++ = b; + } } #endif } @@ -774,78 +821,140 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) #if (SW_DEPTH_BUFFER_BITS == 8) uint8_t v = value * UINT8_MAX; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = v; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = v; + } + } + } + else { + for (int i = 0; i < size; i++) { + p[i] = v; + } } #elif (SW_DEPTH_BUFFER_BITS == 16) uint16_t v = value * UINT16_MAX; uint16_t* p = (uint16_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = v; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = v; + } + } + } + else { + for (int i = 0; i < size; i++) { + p[i] = v; + } } #elif (SW_DEPTH_BUFFER_BITS == 24) uint32_t v = value * UINT32_MAX; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - *p++ = (v >> 16) & 0xFF; - *p++ = (v >> 8) & 0xFF; - *p++ = v & 0xFF; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int offset = y * RLSW.framebuffer.width + x; + p[3 * offset + 0] = (v >> 16) & 0xFF; + p[3 * offset + 1] = (v >> 8) & 0xFF; + p[3 * offset + 2] = v & 0xFF; + } + } + } + else { + for (int i = 0; i < size; i++) { + *p++ = (v >> 16) & 0xFF; + *p++ = (v >> 8) & 0xFF; + *p++ = v & 0xFF; + } } #endif } -static inline void sw_framebuffer_fill(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; - uint8_t* color_p = (uint8_t*)color_ptr; + uint8_t* cptr = (uint8_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 16) uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; - uint16_t* color_p = (uint16_t*)color_ptr; + uint16_t* cptr = (uint16_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 24) uint8_t r = (uint8_t)(color[0] * 255); uint8_t g = (uint8_t)(color[1] * 255); uint8_t b = (uint8_t)(color[2] * 255); - uint8_t* color_p = (uint8_t*)color_ptr; + uint8_t* cptr = (uint8_t*)colorPtr; #endif #if (SW_DEPTH_BUFFER_BITS == 8) - uint8_t depth_v = depth_value * UINT8_MAX; - uint8_t* depth_p = (uint8_t*)depth_ptr; + uint8_t d = depth_value * UINT8_MAX; + uint8_t* dptr = (uint8_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 16) - uint16_t depth_v = depth_value * UINT16_MAX; - uint16_t* depth_p = (uint16_t*)depth_ptr; + uint16_t d = depth_value * UINT16_MAX; + uint16_t* dptr = (uint16_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 24) - uint32_t depth_v = depth_value * UINT32_MAX; - uint8_t* depth_p = (uint8_t*)depth_ptr; + uint32_t d = depth_value * UINT32_MAX; + uint8_t* dptr = (uint8_t*)depthPtr; #endif + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + int offset = y * RLSW.framebuffer.width + x; +# if (SW_COLOR_BUFFER_BITS == 8) + cptr[offset] = (r << 5) | (g << 2) | b; +# elif (SW_COLOR_BUFFER_BITS == 16) + cptr[offset] = (r << 11) | (g << 5) | b; +# elif (SW_COLOR_BUFFER_BITS == 24) + cptr[3 * offset + 0] = r; + cptr[3 * offset + 1] = g; + cptr[3 * offset + 2] = b; +# endif +# if (SW_DEPTH_BUFFER_BITS == 8) + dptr[offset] = d; +# elif (SW_DEPTH_BUFFER_BITS == 16) + dptr[offset] = d; +# elif (SW_DEPTH_BUFFER_BITS == 24) + dptr[3 * offset + 0] = (d >> 16) & 0xFF; + dptr[3 * offset + 1] = (d >> 8) & 0xFF; + dptr[3 * offset + 2] = d & 0xFF; +# endif + } + } + return; + } + for (int i = 0; i < size; i++) { - // Remplir le buffer de couleurs -#if (SW_COLOR_BUFFER_BITS == 8) - color_p[i] = (r << 5) | (g << 2) | b; -#elif (SW_COLOR_BUFFER_BITS == 16) - color_p[i] = (r << 11) | (g << 5) | b; -#elif (SW_COLOR_BUFFER_BITS == 24) - *color_p++ = r; - *color_p++ = g; - *color_p++ = b; -#endif - - // Remplir le buffer de profondeur -#if (SW_DEPTH_BUFFER_BITS == 8) - depth_p[i] = depth_v; -#elif (SW_DEPTH_BUFFER_BITS == 16) - depth_p[i] = depth_v; -#elif (SW_DEPTH_BUFFER_BITS == 24) - *depth_p++ = (depth_v >> 16) & 0xFF; - *depth_p++ = (depth_v >> 8) & 0xFF; - *depth_p++ = depth_v & 0xFF; -#endif +# if (SW_COLOR_BUFFER_BITS == 8) + cptr[i] = (r << 5) | (g << 2) | b; +# elif (SW_COLOR_BUFFER_BITS == 16) + cptr[i] = (r << 11) | (g << 5) | b; +# elif (SW_COLOR_BUFFER_BITS == 24) + *cptr++ = r; + *cptr++ = g; + *cptr++ = b; +# endif +# if (SW_DEPTH_BUFFER_BITS == 8) + dptr[i] = d; +# elif (SW_DEPTH_BUFFER_BITS == 16) + dptr[i] = d; +# elif (SW_DEPTH_BUFFER_BITS == 24) + *dptr++ = (d >> 16) & 0xFF; + *dptr++ = (d >> 8) & 0xFF; + *dptr++ = d & 0xFF; +# endif } } @@ -1411,6 +1520,8 @@ static inline int sw_clip_##name( return outputCount; \ } +// Frustum clip functions + #define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON) #define IS_INSIDE_PLANE_X_POS(h) ((h)[0] <= (h)[3]) #define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] <= (h)[3]) @@ -1435,6 +1546,25 @@ DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG) DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS) DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG) +// Scissor clip functions + +#define COMPUTE_T_SCISSOR_X_MIN(hPrev, hCurr) (((RLSW.scHMin[0]) * (hPrev)[3] - (hPrev)[0]) / (((hCurr)[0] - (RLSW.scHMin[0]) * (hCurr)[3]) - ((hPrev)[0] - (RLSW.scHMin[0]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_X_MAX(hPrev, hCurr) (((RLSW.scHMax[0]) * (hPrev)[3] - (hPrev)[0]) / (((hCurr)[0] - (RLSW.scHMax[0]) * (hCurr)[3]) - ((hPrev)[0] - (RLSW.scHMax[0]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_Y_MIN(hPrev, hCurr) (((RLSW.scHMin[1]) * (hPrev)[3] - (hPrev)[1]) / (((hCurr)[1] - (RLSW.scHMin[1]) * (hCurr)[3]) - ((hPrev)[1] - (RLSW.scHMin[1]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_Y_MAX(hPrev, hCurr) (((RLSW.scHMax[1]) * (hPrev)[3] - (hPrev)[1]) / (((hCurr)[1] - (RLSW.scHMax[1]) * (hCurr)[3]) - ((hPrev)[1] - (RLSW.scHMax[1]) * (hPrev)[3]))) + +#define IS_INSIDE_SCISSOR_X_MIN(h) ((h)[0] >= (RLSW.scHMin[0]) * (h)[3]) +#define IS_INSIDE_SCISSOR_X_MAX(h) ((h)[0] <= (RLSW.scHMax[0]) * (h)[3]) +#define IS_INSIDE_SCISSOR_Y_MIN(h) ((h)[1] >= (RLSW.scHMin[1]) * (h)[3]) +#define IS_INSIDE_SCISSOR_Y_MAX(h) ((h)[1] <= (RLSW.scHMax[1]) * (h)[3]) + +DEFINE_CLIP_FUNC(scissor_x_min, IS_INSIDE_SCISSOR_X_MIN, COMPUTE_T_SCISSOR_X_MIN) +DEFINE_CLIP_FUNC(scissor_x_max, IS_INSIDE_SCISSOR_X_MAX, COMPUTE_T_SCISSOR_X_MAX) +DEFINE_CLIP_FUNC(scissor_y_min, IS_INSIDE_SCISSOR_Y_MIN, COMPUTE_T_SCISSOR_Y_MIN) +DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX) + +// Main clip function + static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; @@ -1457,6 +1587,13 @@ static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_V CLIP_AGAINST_PLANE(sw_clip_z_pos); CLIP_AGAINST_PLANE(sw_clip_z_neg); + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + CLIP_AGAINST_PLANE(sw_clip_scissor_x_min); + CLIP_AGAINST_PLANE(sw_clip_scissor_x_max); + CLIP_AGAINST_PLANE(sw_clip_scissor_y_min); + CLIP_AGAINST_PLANE(sw_clip_scissor_y_max); + } + *vertexCounter = n; return n > 0; @@ -1846,72 +1983,7 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* /* === Line Rendering Part === */ -static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) -{ - uint8_t code = SW_CLIP_INSIDE; - if (screen[0] < xMin) code |= SW_CLIP_LEFT; - if (screen[0] > xMax) code |= SW_CLIP_RIGHT; - if (screen[1] < yMin) code |= SW_CLIP_TOP; - if (screen[1] > yMax) code |= SW_CLIP_BOTTOM; - return code; -} - -// Cohen-Sutherland algorithm, faster but for 2D only -static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) -{ - int xMin = RLSW.vpMin[0]; - int yMin = RLSW.vpMin[1]; - int xMax = RLSW.vpMax[0]; - int yMax = RLSW.vpMax[1]; - - bool accept = false; - uint8_t code0, code1; - float m = 0; - - if (v1->screen[0] != v2->screen[0]) { - m = (v2->screen[1] - v1->screen[1]) / (v2->screen[0] - v1->screen[0]); - } - - for (;;) { - code0 = sw_line_clip_encode_2d(v1->screen, xMin, yMin, xMax, yMax); - code1 = sw_line_clip_encode_2d(v2->screen, xMin, yMin, xMax, yMax); - - // Accepted if both endpoints lie within rectangle - if ((code0 | code1) == 0) { - accept = true; - break; - } - - // Rejected if both endpoints are outside rectangle, in same region - if (code0 & code1) break; - - if (code0 == SW_CLIP_INSIDE) { - uint8_t ctmp = code0; code0 = code1; code1 = ctmp; - sw_vertex_t vtmp = *v1; *v1 = *v2; *v2 = vtmp; - } - - if (code0 & SW_CLIP_LEFT) { - v1->screen[1] += (RLSW.vpMin[0] - v1->screen[0])*m; - v1->screen[0] = (float)RLSW.vpMin[0]; - } - else if (code0 & SW_CLIP_RIGHT) { - v1->screen[1] += (RLSW.vpMax[0] - v1->screen[0])*m; - v1->screen[0] = (float)RLSW.vpMax[0]; - } - else if (code0 & SW_CLIP_BOTTOM) { - if (m) v1->screen[0] += (RLSW.vpMin[1] - v1->screen[1]) / m; - v1->screen[1] = (float)RLSW.vpMin[1]; - } - else if (code0 & SW_CLIP_TOP) { - if (m) v1->screen[0] += (RLSW.vpMax[1] - v1->screen[1]) / m; - v1->screen[1] = (float)RLSW.vpMax[1]; - } - } - - return accept; -} - -static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +static inline bool sw_line_clip_coord(float q, float p, float* t0, float* t1) { if (fabsf(p) < SW_CLIP_EPSILON) { // Check if the line is entirely outside the window @@ -1922,46 +1994,47 @@ static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) const float r = q / p; if (p < 0) { - if (r > *t2) return 0; - if (r > *t1) *t1 = r; + if (r > *t1) return 0; + if (r > *t0) *t0 = r; } else { - if (r < *t1) return 0; - if (r < *t2) *t2 = r; + if (r < *t0) return 0; + if (r < *t1) *t1 = r; } return 1; } -// Liang-Barsky algorithm variant, more robust but slightly slower -static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) { - // TODO: Lerp all vertices here, not just homogeneous coordinates + // Uses Liang-Barsky algorithm - float t1 = 0, t2 = 1; + float t0 = 0; + float t1 = 1; - float delta[4]; + float dH[4], dC[4]; for (int i = 0; i < 4; i++) { - delta[i] = v2->homogeneous[i] - v1->homogeneous[i]; + dH[i] = v1->homogeneous[i] - v0->homogeneous[i]; + dC[i] = v1->color[i] - v0->color[i]; } - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[0], -delta[3] + delta[0], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[0], -delta[3] - delta[0], &t1, &t2)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[1], -dH[3] - dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[1], -delta[3] + delta[1], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[1], -delta[3] - delta[1], &t1, &t2)) return false; - - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[2], -delta[3] + delta[2], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[2], -delta[3] - delta[2], &t1, &t2)) return false; - - if (t2 < 1) { + if (t1 < 1) { for (int i = 0; i < 4; i++) { - v2->homogeneous[i] = v1->homogeneous[i] + t2 * delta[i]; + v1->homogeneous[i] = v0->homogeneous[i] + t1 * dH[i]; + v1->color[i] = v0->color[i] + t1 * dC[i]; } } - if (t1 > 0) { + if (t0 > 0) { for (int i = 0; i < 4; i++) { - v1->homogeneous[i] = v1->homogeneous[i] + t1 * delta[i]; + v0->homogeneous[i] = v0->homogeneous[i] + t0 * dH[i]; + v0->color[i] = v0->color[i] + t0 * dC[i]; } } @@ -1973,29 +2046,22 @@ static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); - if (v0->homogeneous[3] == 1.0f && v1->homogeneous[3] == 1.0f) { - sw_project_ndc_to_screen(v0->screen, v0->homogeneous); - sw_project_ndc_to_screen(v1->screen, v1->homogeneous); - if (!sw_line_clip_2d(v0, v1)) { - return false; - } + if (!sw_line_clip(v0, v1)) { + return false; } - else { - if (!sw_line_clip_3d(v0, v1)) { - return false; - } - // Convert XYZ coordinates to NDC - v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; - v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; - for (int i = 0; i < 3; i++) { - v0->homogeneous[i] *= v0->homogeneous[3]; - v1->homogeneous[i] *= v1->homogeneous[3]; - } - // Convert NDC coordinates to screen space - sw_project_ndc_to_screen(v0->screen, v0->homogeneous); - sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + + // Convert homogeneous coordinates to NDC + v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; + v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; + for (int i = 0; i < 3; i++) { + v0->homogeneous[i] *= v0->homogeneous[3]; + v1->homogeneous[i] *= v1->homogeneous[3]; } + // Convert NDC coordinates to screen space + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + return true; } @@ -2578,6 +2644,7 @@ bool swInit(int w, int h) } swViewport(0, 0, w, h); + swScissor(0, 0, w, h); RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); if (RLSW.loadedTextures == NULL) { swClose(); return false; } @@ -2679,6 +2746,9 @@ bool swResizeFramebuffer(int w, int h) void swEnable(SWstate state) { switch (state) { + case SW_SCISSOR_TEST: + RLSW.stateFlags |= SW_STATE_SCISSOR_TEST; + break; case SW_TEXTURE_2D: RLSW.stateFlags |= SW_STATE_TEXTURE_2D; break; @@ -2700,6 +2770,9 @@ void swEnable(SWstate state) void swDisable(SWstate state) { switch (state) { + case SW_SCISSOR_TEST: + RLSW.stateFlags &= ~SW_STATE_SCISSOR_TEST; + break; case SW_TEXTURE_2D: RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; break; @@ -2744,6 +2817,52 @@ void swViewport(int x, int y, int width, int height) RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; } +void swScissor(int x, int y, int width, int height) +{ + if (x < 0) x = 0; + if (y < 0) y = 0; + + if (width < 0) width = 0; + if (height < 0) height = 0; + + if (x >= RLSW.framebuffer.width) { + x = RLSW.framebuffer.width - 1; + } + if (y >= RLSW.framebuffer.height) { + y = RLSW.framebuffer.height - 1; + } + + if (width >= RLSW.framebuffer.width) { + width = RLSW.framebuffer.width - 1; + } + if (height >= RLSW.framebuffer.height) { + height = RLSW.framebuffer.height - 1; + } + + RLSW.scPos[0] = x; + RLSW.scPos[1] = y; + + RLSW.scDim[0] = width - 1; + RLSW.scDim[1] = height - 1; + + RLSW.scMin[0] = (x < 0) ? 0 : x; + RLSW.scMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.scMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.scMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; + + RLSW.scHMin[0] = (2.0f * (float)RLSW.scMin[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; + RLSW.scHMax[0] = (2.0f * (float)RLSW.scMax[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; + RLSW.scHMax[1] = 1.0f - (2.0f * (float)RLSW.scMin[1] / (float)(RLSW.vpDim[1] + 1)); + RLSW.scHMin[1] = 1.0f - (2.0f * (float)RLSW.scMax[1] / (float)(RLSW.vpDim[1] + 1)); +} + void swClearColor(float r, float g, float b, float a) { RLSW.clearColor[0] = r; From 0fb0c5cd380bd55b0a2826f1403308f40463c0d4 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 12:44:32 +0100 Subject: [PATCH 026/105] review scissor clear --- src/external/rlsw.h | 48 ++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5f683a3b9..c06c7d2d8 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -81,7 +81,7 @@ /* === OpenGL Definitions === */ -#define GL_SCISSOR_TEST 0x0C11 +#define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 @@ -482,8 +482,8 @@ typedef struct { int scDim[2]; // Represents the dimensions of the scissor rect (minus one) int scMin[2]; // Represents the minimum renderable point of the scissor rect (top-left) int scMax[2]; // Represents the maximum renderable point of the scissor rect (bottom-right) - float scHMax[2]; - float scHMin[2]; + float scHMin[2]; // Represents the minimum renderable point of the scissor rect in clip space + float scHMax[2]; // Represents the maximum renderable point of the scissor rect in clip space struct { float* positions; @@ -760,10 +760,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = (r << 5) | (g << 2) | b; } } @@ -778,10 +776,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = (r << 11) | (g << 5) | b; } } @@ -796,10 +792,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = (uint8_t)(color[2] * 255); uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = (y * RLSW.framebuffer.width + x) * 3; p[offset + 0] = r; p[offset + 1] = g; @@ -822,10 +816,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint8_t v = value * UINT8_MAX; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = v; } } @@ -839,10 +831,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint16_t v = value * UINT16_MAX; uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = v; } } @@ -856,10 +846,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint32_t v = value * UINT32_MAX; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = y * RLSW.framebuffer.width + x; p[3 * offset + 0] = (v >> 16) & 0xFF; p[3 * offset + 1] = (v >> 8) & 0xFF; @@ -908,10 +896,8 @@ static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = y * RLSW.framebuffer.width + x; # if (SW_COLOR_BUFFER_BITS == 8) cptr[offset] = (r << 5) | (g << 2) | b; From 3441cf632746bbf80050c981a5e4e44e0126c46d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 12:47:31 +0100 Subject: [PATCH 027/105] review `swScissor` --- src/external/rlsw.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c06c7d2d8..04e7d6709 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -601,6 +601,13 @@ static inline float sw_saturate(float x) // return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x); } +static inline int sw_clampi(int v, int min, int max) +{ + if (v < min) return min; + if (v > max) return max; + return v; +} + static inline float sw_lerp(float a, float b, float t) { return a + t * (b - a); @@ -2805,25 +2812,10 @@ void swViewport(int x, int y, int width, int height) void swScissor(int x, int y, int width, int height) { - if (x < 0) x = 0; - if (y < 0) y = 0; - - if (width < 0) width = 0; - if (height < 0) height = 0; - - if (x >= RLSW.framebuffer.width) { - x = RLSW.framebuffer.width - 1; - } - if (y >= RLSW.framebuffer.height) { - y = RLSW.framebuffer.height - 1; - } - - if (width >= RLSW.framebuffer.width) { - width = RLSW.framebuffer.width - 1; - } - if (height >= RLSW.framebuffer.height) { - height = RLSW.framebuffer.height - 1; - } + sw_clampi(x, 0, RLSW.framebuffer.width); + sw_clampi(y, 0, RLSW.framebuffer.height); + sw_clampi(width, 0, RLSW.framebuffer.width); + sw_clampi(width, 0, RLSW.framebuffer.height); RLSW.scPos[0] = x; RLSW.scPos[1] = y; From f0b02b8cad6348066e38dca4c527a185567f84ba Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:18:41 +0100 Subject: [PATCH 028/105] impl line scissor clipping --- src/external/rlsw.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 04e7d6709..d80bbb58d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1999,17 +1999,15 @@ static inline bool sw_line_clip_coord(float q, float p, float* t0, float* t1) static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) { - // Uses Liang-Barsky algorithm - - float t0 = 0; - float t1 = 1; - + float t0 = 0.0f, t1 = 1.0f; float dH[4], dC[4]; + for (int i = 0; i < 4; i++) { dH[i] = v1->homogeneous[i] - v0->homogeneous[i]; dC[i] = v1->color[i] - v0->color[i]; } + // Clipping Liang-Barsky if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false; @@ -2017,17 +2015,25 @@ static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false; - if (t1 < 1) { + // Clipping Scissor + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + if (!sw_line_clip_coord(v0->homogeneous[0] - RLSW.scHMin[0] * v0->homogeneous[3], RLSW.scHMin[0] * dH[3] - dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(RLSW.scHMax[0] * v0->homogeneous[3] - v0->homogeneous[0], dH[0] - RLSW.scHMax[0] * dH[3], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[1] - RLSW.scHMin[1] * v0->homogeneous[3], RLSW.scHMin[1] * dH[3] - dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(RLSW.scHMax[1] * v0->homogeneous[3] - v0->homogeneous[1], dH[1] - RLSW.scHMax[1] * dH[3], &t0, &t1)) return false; + } + + // Interpolation of new coordinates + if (t1 < 1.0f) { for (int i = 0; i < 4; i++) { v1->homogeneous[i] = v0->homogeneous[i] + t1 * dH[i]; v1->color[i] = v0->color[i] + t1 * dC[i]; } } - - if (t0 > 0) { + if (t0 > 0.0f) { for (int i = 0; i < 4; i++) { - v0->homogeneous[i] = v0->homogeneous[i] + t0 * dH[i]; - v0->color[i] = v0->color[i] + t0 * dC[i]; + v0->homogeneous[i] += t0 * dH[i]; + v0->color[i] += t0 * dC[i]; } } From 633e34e4575a4bbb8002f73190cebdddced608ff Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:19:55 +0100 Subject: [PATCH 029/105] round screen coordinate (line rasterization) --- src/external/rlsw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d80bbb58d..29de03ccb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2067,10 +2067,10 @@ static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) #define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ - int x1 = (int)v0->screen[0]; \ - int y1 = (int)v0->screen[1]; \ - int x2 = (int)v1->screen[0]; \ - int y2 = (int)v1->screen[1]; \ + int x1 = (int)(v0->screen[0] + 0.5f); \ + int y1 = (int)(v0->screen[1] + 0.5f); \ + int x2 = (int)(v1->screen[0] + 0.5f); \ + int y2 = (int)(v1->screen[1] + 0.5f); \ \ float z1 = v0->homogeneous[2]; \ float z2 = v1->homogeneous[2]; \ From 2f9eb5b7276a34fb53f5ee2fb7080bd902f11c85 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:57:12 +0100 Subject: [PATCH 030/105] impl point scissor clipping --- src/external/rlsw.h | 80 +++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 29de03ccb..fd6a675a4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2315,23 +2315,37 @@ static inline bool sw_point_project_and_clip(sw_vertex_t* v) sw_project_ndc_to_screen(v->screen, v->homogeneous); - return v->screen[0] - RLSW.pointRadius >= RLSW.vpMin[0] - && v->screen[1] - RLSW.pointRadius >= RLSW.vpMin[1] - && v->screen[0] + RLSW.pointRadius <= RLSW.vpMax[0] - && v->screen[1] + RLSW.pointRadius <= RLSW.vpMax[1]; + const int *min, *max; + + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + min = RLSW.scMin; + max = RLSW.scMax; + } else { + min = RLSW.vpMin; + max = RLSW.vpMax; + } + + bool insideX = v->screen[0] - RLSW.pointRadius < max[0] + && v->screen[0] + RLSW.pointRadius > min[0]; + + bool insideY = v->screen[1] - RLSW.pointRadius < max[1] + && v->screen[1] + RLSW.pointRadius > min[1]; + + return insideX && insideY; } #define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ { \ - if (CHECK_BOUNDS) \ + if (CHECK_BOUNDS == 1) \ { \ - if (x < 0 || x >= RLSW.framebuffer.width) { \ - return; \ - } \ - if (y < 0 || y >= RLSW.framebuffer.height) { \ - return; \ - } \ + if (x < RLSW.vpMin[0] || x > RLSW.vpMax[0]) return; \ + if (y < RLSW.vpMin[1] || y > RLSW.vpMax[1]) return; \ + } \ + else if (CHECK_BOUNDS == SW_SCISSOR_TEST) \ + { \ + if (x < RLSW.scMin[0] || x > RLSW.scMax[0]) return; \ + if (y < RLSW.scMin[1] || y > RLSW.scMax[1]) return; \ } \ \ int offset = y * RLSW.framebuffer.width + x; \ @@ -2408,11 +2422,21 @@ DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1) DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1) +DEFINE_POINT_RASTER(sw_point_raster_CHECK_SCISSOR, 0, 0, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK_SCISSOR, 1, 0, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK_SCISSOR, 0, 1, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR, 1, 1, SW_SCISSOR_TEST) + DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_SCISSOR, sw_point_raster_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_SCISSOR, sw_point_raster_DEPTH_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND_SCISSOR, sw_point_raster_BLEND_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND_SCISSOR, sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR) + static inline void sw_point_render(sw_vertex_t* v) { if (!sw_point_project_and_clip(v)) { @@ -2420,17 +2444,33 @@ static inline void sw_point_render(sw_vertex_t* v) } if (RLSW.pointRadius >= 1.0f) { - if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_point_thick_raster_DEPTH_BLEND(v); - } - else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_point_thick_raster_BLEND(v); - } - else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_point_thick_raster_DEPTH(v); + if (SW_STATE_CHECK(SW_STATE_SCISSOR_TEST)) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND_SCISSOR(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND_SCISSOR(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH_SCISSOR(v); + } + else { + sw_point_thick_raster_SCISSOR(v); + } } else { - sw_point_thick_raster(v); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH(v); + } + else { + sw_point_thick_raster(v); + } } } else { From 26dfe69e5f6f229de04e222cb9331ddef32d0849 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:57:55 +0100 Subject: [PATCH 031/105] remove unused defs --- src/external/rlsw.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index fd6a675a4..d3e927a5e 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -388,12 +388,6 @@ void swBindTexture(uint32_t id); #define SW_STATE_CULL_FACE (1 << 3) #define SW_STATE_BLEND (1 << 4) -#define SW_CLIP_INSIDE (0x00) // 0000 -#define SW_CLIP_LEFT (0x01) // 0001 -#define SW_CLIP_RIGHT (0x02) // 0010 -#define SW_CLIP_BOTTOM (0x04) // 0100 -#define SW_CLIP_TOP (0x08) // 1000 - /* === Internal Structs === */ typedef enum { From 2009490151580d1a77d6ae409e5ba80e05f57563 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 17:16:21 +0100 Subject: [PATCH 032/105] add getter functions --- src/external/rlsw.h | 284 +++++++++++++++++++++++++++++++++----------- 1 file changed, 213 insertions(+), 71 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d3e927a5e..60dd29e6b 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -81,92 +81,125 @@ /* === OpenGL Definitions === */ -#define GL_SCISSOR_TEST 0x0C11 -#define GL_TEXTURE_2D 0x0DE1 -#define GL_DEPTH_TEST 0x0B71 -#define GL_CULL_FACE 0x0B44 -#define GL_BLEND 0x0BE2 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_DEPTH_TEST 0x0B71 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 -#define GL_COLOR_BUFFER_BIT 0x00004000 -#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 -#define GL_MODELVIEW 0x1700 -#define GL_PROJECTION 0x1701 -#define GL_TEXTURE 0x1702 +//#define GL_ATTRIB_STACK_DEPTH 0x0BB0 +//#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +//#define GL_COLOR_WRITEMASK 0x0C23 +//#define GL_CURRENT_INDEX 0x0B01 +#define GL_CURRENT_COLOR 0x0B00 +#define GL_CURRENT_NORMAL 0x0B02 +//#define GL_CURRENT_RASTER_COLOR 0x0B04 +//#define GL_CURRENT_RASTER_DISTANCE 0x0B09 +//#define GL_CURRENT_RASTER_INDEX 0x0B05 +//#define GL_CURRENT_RASTER_POSITION 0x0B07 +//#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06 +//#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08 +#define GL_CURRENT_TEXTURE_COORDS 0x0B03 +//#define GL_INDEX_CLEAR_VALUE 0x0C20 +//#define GL_INDEX_MODE 0x0C30 +//#define GL_INDEX_WRITEMASK 0x0C21 +#define GL_MODELVIEW_MATRIX 0x0BA6 +#define GL_MODELVIEW_STACK_DEPTH 0x0BA3 +//#define GL_NAME_STACK_DEPTH 0x0D70 +#define GL_PROJECTION_MATRIX 0x0BA7 +#define GL_PROJECTION_STACK_DEPTH 0x0BA4 +//#define GL_RENDER_MODE 0x0C40 +//#define GL_RGBA_MODE 0x0C31 +#define GL_TEXTURE_MATRIX 0x0BA8 +#define GL_TEXTURE_STACK_DEPTH 0x0BA5 +#define GL_VIEWPORT 0x0BA2 -#define GL_VERTEX_ARRAY 0x8074 -#define GL_NORMAL_ARRAY 0x8075 -#define GL_COLOR_ARRAY 0x8076 -//#define GL_INDEX_ARRAY 0x8077 -#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_DEPTH_BUFFER_BIT 0x00000100 -#define GL_POINTS 0x0000 -#define GL_LINES 0x0001 -//#define GL_LINE_LOOP 0x0002 -//#define GL_LINE_STRIP 0x0003 -#define GL_TRIANGLES 0x0004 -//#define GL_TRIANGLE_STRIP 0x0005 -//#define GL_TRIANGLE_FAN 0x0006 -#define GL_QUADS 0x0007 -//#define GL_QUAD_STRIP 0x0008 -//#define GL_POLYGON 0x0009 +#define GL_MODELVIEW 0x1700 +#define GL_PROJECTION 0x1701 +#define GL_TEXTURE 0x1702 -#define GL_POINT 0x1B00 -#define GL_LINE 0x1B01 -#define GL_FILL 0x1B02 +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +//#define GL_INDEX_ARRAY 0x8077 +#define GL_TEXTURE_COORD_ARRAY 0x8078 -//#define GL_CW 0x0900 -//#define GL_CCW 0x0901 +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +//#define GL_LINE_LOOP 0x0002 +//#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +//#define GL_TRIANGLE_STRIP 0x0005 +//#define GL_TRIANGLE_FAN 0x0006 +#define GL_QUADS 0x0007 +//#define GL_QUAD_STRIP 0x0008 +//#define GL_POLYGON 0x0009 -#define GL_FRONT 0x0404 -#define GL_BACK 0x0405 +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 -#define GL_ZERO 0 -#define GL_ONE 1 -#define GL_SRC_COLOR 0x0300 -#define GL_ONE_MINUS_SRC_COLOR 0x0301 -#define GL_SRC_ALPHA 0x0302 -#define GL_ONE_MINUS_SRC_ALPHA 0x0303 -#define GL_DST_ALPHA 0x0304 -#define GL_ONE_MINUS_DST_ALPHA 0x0305 -#define GL_DST_COLOR 0x0306 -#define GL_ONE_MINUS_DST_COLOR 0x0307 -#define GL_SRC_ALPHA_SATURATE 0x0308 +//#define GL_CW 0x0900 +//#define GL_CCW 0x0901 -#define GL_NEAREST 0x2600 -#define GL_LINEAR 0x2601 +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 -#define GL_REPEAT 0x2901 -#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) -#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 -#define GL_TEXTURE_MAG_FILTER 0x2800 -#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 -#define GL_TEXTURE_WRAP_S 0x2802 -#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) +#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) -#define GL_NO_ERROR 0 -#define GL_INVALID_ENUM 0x0500 -#define GL_INVALID_VALUE 0x0501 -#define GL_INVALID_OPERATION 0x0502 -#define GL_STACK_OVERFLOW 0x0503 -#define GL_STACK_UNDERFLOW 0x0504 -#define GL_OUT_OF_MEMORY 0x0505 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 -#define GL_ALPHA 0x1906 -#define GL_LUMINANCE 0x1909 -#define GL_LUMINANCE_ALPHA 0x190A -#define GL_RGB 0x1907 -#define GL_RGBA 0x1908 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 -#define GL_BYTE 0x1400 -#define GL_UNSIGNED_BYTE 0x1401 -#define GL_SHORT 0x1402 -#define GL_UNSIGNED_SHORT 0x1403 -#define GL_INT 0x1404 -#define GL_UNSIGNED_INT 0x1405 -#define GL_FLOAT 0x1406 +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_OUT_OF_MEMORY 0x0505 + +#define GL_ALPHA 0x1906 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 + +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 /* === Not Implemented === */ @@ -185,6 +218,24 @@ typedef enum { SW_BLEND = GL_BLEND } SWstate; +typedef enum { + SW_VENDOR = GL_VENDOR, + SW_RENDERER = GL_RENDERER, + SW_VERSION = GL_VERSION, + SW_EXTENSIONS = GL_EXTENSIONS, + SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE, + SW_CURRENT_COLOR = GL_CURRENT_COLOR, + SW_CURRENT_NORMAL = GL_CURRENT_NORMAL, + SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS, + SW_MODELVIEW_MATRIX = GL_MODELVIEW_MATRIX, + SW_MODELVIEW_STACK_DEPTH = GL_MODELVIEW_STACK_DEPTH, + SW_PROJECTION_MATRIX = GL_PROJECTION_MATRIX, + SW_PROJECTION_STACK_DEPTH = GL_PROJECTION_STACK_DEPTH, + SW_TEXTURE_MATRIX = GL_TEXTURE_MATRIX, + SW_TEXTURE_STACK_DEPTH = GL_TEXTURE_STACK_DEPTH, + SW_VIEWPORT = GL_VIEWPORT +} SWget; + typedef enum { SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT, SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT @@ -291,6 +342,9 @@ bool swResizeFramebuffer(int w, int h); void swEnable(SWstate state); void swDisable(SWstate state); +void swGetFloatv(SWget name, float* v); +const char* glGetString(SWget name); + void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -2824,6 +2878,94 @@ void swDisable(SWstate state) } } +void swGetIntegerv(SWget name, int* v) +{ + switch (name) { + case SW_MODELVIEW_STACK_DEPTH: + *v = SW_MODELVIEW_STACK_DEPTH; + break; + case SW_PROJECTION_STACK_DEPTH: + *v = SW_PROJECTION_STACK_DEPTH; + break; + case SW_TEXTURE_STACK_DEPTH: + *v = SW_TEXTURE_STACK_DEPTH; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void swGetFloatv(SWget name, float* v) +{ + switch (name) { + case SW_COLOR_CLEAR_VALUE: + v[0] = RLSW.clearColor[0]; + v[1] = RLSW.clearColor[1]; + v[2] = RLSW.clearColor[2]; + v[3] = RLSW.clearColor[3]; + break; + case SW_CURRENT_COLOR: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[1]; + v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[2]; + v[3] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[3]; + break; + case SW_CURRENT_NORMAL: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[1]; + v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[2]; + break; + case SW_CURRENT_TEXTURE_COORDS: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[1]; + break; + case SW_MODELVIEW_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackModelview[RLSW.stackModelviewCounter - 1][i]; + } + break; + case SW_PROJECTION_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackProjection[RLSW.stackProjectionCounter - 1][i]; + } + break; + case SW_TEXTURE_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackTexture[RLSW.stackTextureCounter - 1][i]; + } + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +const char* glGetString(SWget name) +{ + const char* result = NULL; + + switch (name) { + case SW_VENDOR: + result = "RLSW Header"; + break; + case SW_RENDERER: + result = "RLSW Software Renderer"; + break; + case SW_VERSION: + result = "RLSW 1.0"; + break; + case SW_EXTENSIONS: + result = "None"; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } + + return result; +} + void swViewport(int x, int y, int width, int height) { if (x <= -width || y <= -height) { From 618f283cb2fb5273c1ea8c9df345b342f5278b0c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 17:59:58 +0100 Subject: [PATCH 033/105] gl binding --- src/external/rlsw.h | 186 ++++++++++++++++---------------------------- 1 file changed, 68 insertions(+), 118 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 60dd29e6b..38f00a0e7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -202,10 +202,73 @@ #define GL_FLOAT 0x1406 +/* === OpenGL Binding === */ + +#define glEnable(state) swEnable(state) +#define glDisable(state) swDisable(state) +#define glGetFloatv(pname, params) swGetFloatv(pname, params) +#define glGetString(pname) swGetString(pname) +#define glViewport(x, y, w, h) swViewport(x, y, w, h) +#define glScissor(x, y, w, h) swScissor(x, y, w, h) +#define glClearColor(r, g, b, a) swClearColor(r, g, b, a) +#define glClear(bitmask) swClear(bitmask) +#define glBlendFunc(sfactor, dfactor) swBlendFunc(sfactor, dfactor) +#define glPolygonMode(mode) swPolygonMode(mode) +#define glCullFace(face) swCullFace(face) +#define glPointSize(size) swPointSize(size) +#define glLineWidth(width) swLineWidth(width) +#define glMatrixMode(mode) swMatrixMode(mode) +#define glPushMatrix() swPushMatrix() +#define glPopMatrix() swPopMatrix() +#define glLoadIdentity() swLoadIdentity() +#define glTranslatef(x, y, z) swTranslatef(x, y, z) +#define glRotatef(a, x, y, z) swRotatef(a, x, y, z) +#define glScalef(x, y, z) swScalef(x, y, z) +#define glMultMatrixf(v) swMultMatrixf(v) +#define glFrusutm(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) +#define glOrtho(l, r, b, t, n, f) swOrtho(l, r, b, t, n, f) +#define glBegin(mode) swBegin(mode) +#define glEnd() swEnd() +#define glVertex2i(x, y) swVertex2i(x, y) +#define glVertex2f(x, y) swVertex2f(x, y) +#define glVertex2fv(v) swVertex2fv(v) +#define glVertex3i(x, y, z) swVertex3i(x, y, z) +#define glVertex3f(x, y, z) swVertex3f(x, y, z) +#define glvertex3fv(v) swVertex3fv(v) +#define glVertex4i(x, y, z, w) swVertex4i(x, y, z, w) +#define glVertex4f(x, y, z, w) swVertex4f(x, y, z, w) +#define glVertex4fv(v) swVertex4fv(v) +#define glColor3ub(r, g, b) swColor3ub(r, g, b) +#define glColor3ubv(v) swColor3ubv(v) +#define glColor3f(r, g, b) swColor3f(r, g, b) +#define glColor3fv(v) swColor3fv(v) +#define glColor4ub(r, g, b, a) swColor4ub(r, g, b, a) +#define glColor4ubv(v) swColor4ubv(v) +#define glColor4f(r, g, b, a) swColor4f(r, g, b, a) +#define glColor4fv(v) swColor4fv(v) +#define glTexCoord2f(u, v) swTexCoord2f(u, v) +#define glTexCoord2fv(v) swTexCoord2fv(v) +#define glNormal3f(x, y, z) swNormal3f(x, y, z) +#define glNormal3fv(v) swNormal3fv(v) +#define glEnableClientState(t) ((void)(t)) +#define glDisableClientState(t) swBindArray(t, 0) +#define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) +#define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) +#define glNormalPointer(sz, t, s, p) swBindArray(SW_NORMAL_ARRAY, p) +#define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) +#define glDrawArrays(m, o, c) swDrawArrays(m, o, c) +#define glGenTextures(c, v) swGenTextures(c, v) +#define glDeleteTextures(c, v) swDeleteTextures(c, v) +#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) +#define glTexParameteri(pname, param) swTexParameteri(param, value) +#define glBindTexture(tr, id) swBindTexture(id) + + /* === Not Implemented === */ -#define glDepthMask(x) ((void)(x)) -#define glColorMask(x) ((void)(x)) +#define glDepthMask(X) ((void)(X)) +#define glColorMask(X) ((void)(X)) +#define glPixelStorei(X, Y) ((void)(X);(void)(Y)) /* === RLSW Enums === */ @@ -382,26 +445,17 @@ void swVertex4i(int x, int y, int z, int w); void swVertex4f(float x, float y, float z, float w); void swVertex4fv(const float* v); -void swColor1ui(uint32_t color); void swColor3ub(uint8_t r, uint8_t g, uint8_t b); void swColor3ubv(const uint8_t* v); -void swColor3us(uint16_t r, uint16_t g, uint16_t b); -void swColor3usv(const uint16_t* v); -void swColor3ui(uint32_t r, uint32_t g, uint32_t b); -void swColor3uiv(const uint32_t* v); void swColor3f(float r, float g, float b); void swColor3fv(const float* v); void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); void swColor4ubv(const uint8_t* v); -void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); -void swColor4usv(const uint16_t* v); -void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); -void swColor4uiv(const uint32_t* v); void swColor4f(float r, float g, float b, float a); void swColor4fv(const float* v); void swTexCoord2f(float u, float v); -void swTexCoordfv(const float* v); +void swTexCoord2fv(const float* v); void swNormal3f(float x, float y, float z); void swNormal3fv(const float* v); @@ -3466,22 +3520,6 @@ void swVertex4fv(const float* v) } } -void swColor1ui(uint32_t color) -{ - union { - uint32_t v; - uint8_t a[4]; - } c = { .v = color }; - - float cv[4]; - cv[0] = (float)c.a[0] / 255; - cv[1] = (float)c.a[1] / 255; - cv[2] = (float)c.a[2] / 255; - cv[3] = (float)c.a[3] / 255; - - swColor4fv(cv); -} - void swColor3ub(uint8_t r, uint8_t g, uint8_t b) { float cv[4]; @@ -3504,50 +3542,6 @@ void swColor3ubv(const uint8_t* v) swColor4fv(cv); } -void swColor3us(uint16_t r, uint16_t g, uint16_t b) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3usv(const uint16_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3ui(uint32_t r, uint32_t g, uint32_t b) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3uiv(const uint32_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - void swColor3f(float r, float g, float b) { float cv[4]; @@ -3592,50 +3586,6 @@ void swColor4ubv(const uint8_t* v) swColor4fv(cv); } -void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = (float)((uint8_t)(a >> 8)) / 255; - - swColor4fv(cv); -} - -void swColor4usv(const uint16_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; - - swColor4fv(cv); -} - -void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = (float)((uint8_t)(a >> 24)) / 255; - - swColor4fv(cv); -} - -void swColor4uiv(const uint32_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; - - swColor4fv(cv); -} - void swColor4f(float r, float g, float b, float a) { float cv[4]; @@ -3663,7 +3613,7 @@ void swTexCoord2f(float u, float v) RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; } -void swTexCoordfv(const float* v) +void swTexCoord2fv(const float* v) { float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; @@ -3717,7 +3667,7 @@ void swDrawArrays(SWdraw mode, int offset, int count) for (int i = offset; i < count; i++) { if (RLSW.array.texcoords) { - swTexCoordfv(RLSW.array.texcoords + 2 * i); + swTexCoord2fv(RLSW.array.texcoords + 2 * i); } if (RLSW.array.normals) { swNormal3fv(RLSW.array.normals + 3 * i); From 13585d8684a2d2b133a61b837a1effe9bb907f04 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 18:30:27 +0100 Subject: [PATCH 034/105] add `glHint` and `glShadeModel` macros (not implmented) --- src/external/rlsw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 38f00a0e7..c2dc4ee10 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -269,7 +269,8 @@ #define glDepthMask(X) ((void)(X)) #define glColorMask(X) ((void)(X)) #define glPixelStorei(X, Y) ((void)(X);(void)(Y)) - +#define glHint(X, Y) ((void)(X);(void)(Y)) +#define glShadeModel(X) ((void)(X)) /* === RLSW Enums === */ From 64c39e9d536d1df292b1c37500e2716ec160a309 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 19:19:15 +0100 Subject: [PATCH 035/105] binding tweaks --- src/external/rlsw.h | 70 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c2dc4ee10..0636d08f2 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -79,8 +79,30 @@ #endif +/* === OpenGL Compatibility Types === */ + +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef signed char GLbyte; +typedef short GLshort; +typedef int GLint; +typedef unsigned char GLubyte; +typedef unsigned short GLushort; +typedef unsigned int GLuint; +typedef int GLsizei; +typedef float GLfloat; +typedef float GLclampf; +typedef double GLdouble; +typedef double GLclampd; + + /* === OpenGL Definitions === */ +#define GL_FALSE 0 +#define GL_TRUE 1 + #define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 @@ -148,9 +170,6 @@ #define GL_LINE 0x1B01 #define GL_FILL 0x1B02 -//#define GL_CW 0x0900 -//#define GL_CCW 0x0901 - #define GL_FRONT 0x0404 #define GL_BACK 0x0405 @@ -202,6 +221,26 @@ #define GL_FLOAT 0x1406 +/* === Not Used Definitions === */ + +#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_SMOOTH 0x1D01 +#define GL_NICEST 0x1102 +#define GL_CCW 0x0901 +#define GL_CW 0x0900 +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 + + /* === OpenGL Binding === */ #define glEnable(state) swEnable(state) @@ -213,7 +252,7 @@ #define glClearColor(r, g, b, a) swClearColor(r, g, b, a) #define glClear(bitmask) swClear(bitmask) #define glBlendFunc(sfactor, dfactor) swBlendFunc(sfactor, dfactor) -#define glPolygonMode(mode) swPolygonMode(mode) +#define glPolygonMode(face, mode) swPolygonMode(mode) #define glCullFace(face) swCullFace(face) #define glPointSize(size) swPointSize(size) #define glLineWidth(width) swLineWidth(width) @@ -225,7 +264,7 @@ #define glRotatef(a, x, y, z) swRotatef(a, x, y, z) #define glScalef(x, y, z) swScalef(x, y, z) #define glMultMatrixf(v) swMultMatrixf(v) -#define glFrusutm(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) +#define glFrustum(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) #define glOrtho(l, r, b, t, n, f) swOrtho(l, r, b, t, n, f) #define glBegin(mode) swBegin(mode) #define glEnd() swEnd() @@ -254,23 +293,30 @@ #define glDisableClientState(t) swBindArray(t, 0) #define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) #define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) -#define glNormalPointer(sz, t, s, p) swBindArray(SW_NORMAL_ARRAY, p) +#define glNormalPointer(t, s, p) swBindArray(SW_NORMAL_ARRAY, p) #define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) #define glDrawArrays(m, o, c) swDrawArrays(m, o, c) #define glGenTextures(c, v) swGenTextures(c, v) #define glDeleteTextures(c, v) swDeleteTextures(c, v) #define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) -#define glTexParameteri(pname, param) swTexParameteri(param, value) +#define glTexParameteri(tr, pname, param) swTexParameteri(pname, param) #define glBindTexture(tr, id) swBindTexture(id) /* === Not Implemented === */ -#define glDepthMask(X) ((void)(X)) -#define glColorMask(X) ((void)(X)) -#define glPixelStorei(X, Y) ((void)(X);(void)(Y)) -#define glHint(X, Y) ((void)(X);(void)(Y)) -#define glShadeModel(X) ((void)(X)) +#define glClearDepth(X) ((void)(X)) +#define glDepthMask(X) ((void)(X)) +#define glColorMask(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) +#define glPixelStorei(X,Y) ((void)(X),(void)(Y)) +#define glHint(X,Y) ((void)(X),(void)(Y)) +#define glShadeModel(X) ((void)(X)) +#define glFrontFace(X) ((void)(X)) +#define glDepthFunc(X) ((void)(X)) +#define glTexSubImage2D(X,Y,Z,W,A,B,C,D,E) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A),(void)(B),(void)(C),(void)(D),(void)(E)) +#define glGetTexImage(X,Y,Z,W,A) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A)) +#define glDrawElements(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) + /* === RLSW Enums === */ From 02f7d3146b0cfa87d6d089cf8b5d9179e6316b11 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 19:50:27 +0100 Subject: [PATCH 036/105] impl copy framebuffer function + glReadPixels --- src/external/rlsw.h | 294 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 259 insertions(+), 35 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 0636d08f2..70762152d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -243,6 +243,7 @@ typedef double GLclampd; /* === OpenGL Binding === */ +#define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer(x, y, w, h, f, t, p) #define glEnable(state) swEnable(state) #define glDisable(state) swDisable(state) #define glGetFloatv(pname, params) swGetFloatv(pname, params) @@ -446,8 +447,8 @@ typedef enum { bool swInit(int w, int h); void swClose(void); -void* swGetColorBuffer(int* w, int* h); bool swResizeFramebuffer(int w, int h); +void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels); void swEnable(SWstate state); void swDisable(SWstate state); @@ -1101,6 +1102,62 @@ static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, } +/* === Half Floating Point === */ + +static inline uint32_t sw_cvt_hf_ui(uint16_t h) +{ + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; +} + +static inline float sw_cvt_hf(sw_half_t y) +{ + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; +} + +static inline uint16_t sw_cvt_fh_ui(uint32_t ui) +{ + int32_t s = (ui >> 16) & 0x8000; + int32_t em = ui & 0x7fffffff; + + // bias exponent and round to nearest; 112 is relative exponent bias (127-15) + int32_t h = (em - (112 << 23) + (1 << 12)) >> 13; + + // underflow: flush to zero; 113 encodes exponent -14 + h = (em < (113 << 23)) ? 0 : h; + + // overflow: infinity; 143 encodes exponent 16 + h = (em >= (143 << 23)) ? 0x7c00 : h; + + // NaN; note that we convert all types of NaN to qNaN + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (uint16_t)(s | h); +} + +static inline sw_half_t sw_cvt_fh(float i) +{ + union { float f; uint32_t i; } v; + v.f = i; + return sw_cvt_fh_ui(v.i); +} + + /* === Pixel Format Part === */ static inline int sw_get_pixel_format(SWformat format, SWtype type) @@ -1150,32 +1207,6 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) return -1; // Unsupported format } -static inline uint32_t sw_cvt_hf_ui(uint16_t h) -{ - uint32_t s = (uint32_t)(h & 0x8000) << 16; - int32_t em = h & 0x7fff; - - // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) - int32_t r = (em + (112 << 10)) << 13; - - // denormal: flush to zero - r = (em < (1 << 10)) ? 0 : r; - - // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases - // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 - r += (em >= (31 << 10)) ? (112 << 23) : 0; - - return s | r; -} - -static inline float sw_cvt_hf(sw_half_t y) -{ - union { float f; uint32_t i; } v = { - .i = sw_cvt_hf_ui(y) - }; - return v.f; -} - static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); @@ -1376,6 +1407,193 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse } } +static inline void sw_set_pixel_grayscale(void* pixels, uint32_t offset, const float* color) +{ + ((uint8_t*)pixels)[offset] = (uint8_t)(color[0] * 255.0f); +} + +static inline void sw_set_pixel_red_16(void* pixels, uint32_t offset, const float* color) +{ + ((sw_half_t*)pixels)[offset] = sw_cvt_fh(color[0]); +} + +static inline void sw_set_pixel_red_32(void* pixels, uint32_t offset, const float* color) +{ + ((float*)pixels)[offset] = color[0]; +} + +static inline void sw_set_pixel_grayscale_alpha(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixelData = (uint8_t*)pixels + 2 * offset; + + pixelData[0] = (uint8_t)(color[0] * 255.0f); // Valeur de gris + pixelData[1] = (uint8_t)(color[3] * 255.0f); // Alpha +} + +static inline void sw_set_pixel_rgb_565(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; + uint16_t g = (uint16_t)(color[1] * 63) & 0x3F; + uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; + + *pixel = (r << 11) | (g << 5) | b; +} + +static inline void sw_set_pixel_rgb_888(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + pixel[0] = (uint8_t)(color[0] * 255.0f); + pixel[1] = (uint8_t)(color[1] * 255.0f); + pixel[2] = (uint8_t)(color[2] * 255.0f); +} + +static inline void sw_set_pixel_rgb_161616(void* pixels, uint32_t offset, const float* color) +{ + sw_half_t* pixel = (sw_half_t*)pixels + 3 * offset; + + pixel[0] = sw_cvt_fh(color[0]); + pixel[1] = sw_cvt_fh(color[1]); + pixel[2] = sw_cvt_fh(color[2]); +} + +static inline void sw_set_pixel_rgb_323232(void* pixels, uint32_t offset, const float* color) +{ + float* pixel = (float*)pixels + 3 * offset; + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; +} + +static inline void sw_set_pixel_rgba_5551(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; + uint16_t g = (uint16_t)(color[1] * 31) & 0x1F; + uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; + uint16_t a = (color[3] > 0.5f) ? 1 : 0; // Alpha 1 bit + + *pixel = (r << 11) | (g << 6) | (b << 1) | a; +} + +static inline void sw_set_pixel_rgba_4444(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 15) & 0x0F; + uint16_t g = (uint16_t)(color[1] * 15) & 0x0F; + uint16_t b = (uint16_t)(color[2] * 15) & 0x0F; + uint16_t a = (uint16_t)(color[3] * 15) & 0x0F; + + *pixel = (r << 12) | (g << 8) | (b << 4) | a; +} + +static inline void sw_set_pixel_rgba_8888(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixel = (uint8_t*)pixels + 4 * offset; + + pixel[0] = (uint8_t)(color[0] * 255.0f); + pixel[1] = (uint8_t)(color[1] * 255.0f); + pixel[2] = (uint8_t)(color[2] * 255.0f); + pixel[3] = (uint8_t)(color[3] * 255.0f); +} + +static inline void sw_set_pixel_rgba_16161616(void* pixels, uint32_t offset, const float* color) +{ + sw_half_t* pixel = (sw_half_t*)pixels + 4 * offset; + + pixel[0] = sw_cvt_fh(color[0]); + pixel[1] = sw_cvt_fh(color[1]); + pixel[2] = sw_cvt_fh(color[2]); + pixel[3] = sw_cvt_fh(color[3]); +} + +static inline void sw_set_pixel_rgba_32323232(void* pixels, uint32_t offset, const float* color) +{ + float* pixel = (float*)pixels + 4 * offset; + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; + pixel[3] = color[3]; +} + +static inline void sw_set_pixel(void* pixels, uint32_t offset, sw_pixelformat_e format, const float* color) +{ + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_set_pixel_grayscale(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_set_pixel_grayscale_alpha(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_set_pixel_rgb_565(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_set_pixel_rgb_888(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_set_pixel_rgba_5551(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_set_pixel_rgba_4444(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_set_pixel_rgba_8888(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_set_pixel_red_32(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_set_pixel_rgb_323232(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_set_pixel_rgba_32323232(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_set_pixel_red_16(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_set_pixel_rgb_161616(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_set_pixel_rgba_16161616(pixels, offset, color); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } +} + /* === Texture Sampling Part === */ @@ -2918,19 +3136,25 @@ void swClose(void) RLSW = (sw_context_t) { 0 }; } -void* swGetColorBuffer(int* w, int* h) -{ - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; - - return RLSW.framebuffer.color; -} - bool swResizeFramebuffer(int w, int h) { return sw_framebuffer_resize(w, h); } +void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels) +{ + sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + float color[4]; + int offset = y * RLSW.framebuffer.width + x; + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(RLSW.framebuffer.color, offset)); + sw_set_pixel(pixels, offset, pFormat, color); + } + } +} + void swEnable(SWstate state) { switch (state) { From c7fa9a0fa2a4920d30464f23f1fc604ff38426e7 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 20:00:32 +0100 Subject: [PATCH 037/105] review `swCopyFramebuffer` --- src/external/rlsw.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 70762152d..4d77bdc68 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3145,12 +3145,16 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, { sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); + void* src = RLSW.framebuffer.color; + + int srcW = RLSW.framebuffer.width; + int srcHm1 = RLSW.framebuffer.height - 1; + for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float color[4]; - int offset = y * RLSW.framebuffer.width + x; - sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(RLSW.framebuffer.color, offset)); - sw_set_pixel(pixels, offset, pFormat, color); + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, (srcHm1 - y) * srcW + x)); + sw_set_pixel(pixels, y * srcW + x, pFormat, color); } } } From c9338481a489d54c9e602a9801bf36e99b317618 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 22:44:59 +0100 Subject: [PATCH 038/105] update rlgl.h --- src/rlgl.h | 68 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/src/rlgl.h b/src/rlgl.h index d55ae82f2..a035481cc 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -159,7 +159,7 @@ #endif // Security check in case multiple GRAPHICS_API_OPENGL_* defined -#if defined(GRAPHICS_API_OPENGL_11) +#if defined(GRAPHICS_API_OPENGL_11) || defined(GRAPHICS_API_OPENGL_11_SOFTWARE) #if defined(GRAPHICS_API_OPENGL_21) #undef GRAPHICS_API_OPENGL_21 #endif @@ -174,6 +174,11 @@ #endif #endif +// Software implementation uses OpenGL 1.1 functionality +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + #define GRAPHICS_API_OPENGL_11 +#endif + // OpenGL 2.1 uses most of OpenGL 3.3 Core functionality // WARNING: Specific parts are checked with #if defines #if defined(GRAPHICS_API_OPENGL_21) @@ -767,6 +772,10 @@ RLAPI unsigned int rlLoadFramebuffer(void); // Loa RLAPI void rlFramebufferAttach(unsigned int fboId, unsigned int texId, int attachType, int texType, int mipLevel); // Attach texture/renderbuffer to a framebuffer RLAPI bool rlFramebufferComplete(unsigned int id); // Verify framebuffer is complete RLAPI void rlUnloadFramebuffer(unsigned int id); // Delete framebuffer from GPU +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) +RLAPI void* rlGetFramebuffer(int* width, int* height); +RLAPI void rlResizeFramebuffer(int width, int height); +#endif // Shaders management RLAPI unsigned int rlLoadShaderCode(const char *vsCode, const char *fsCode); // Load shader from code strings @@ -833,24 +842,29 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #endif #if defined(GRAPHICS_API_OPENGL_11) - #if defined(__APPLE__) - #include // OpenGL 1.1 library for OSX - #include // OpenGL extensions library + #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + #define RLSW_IMPL + #include // OpenGL 1.1 software implementation #else - // APIENTRY for OpenGL function pointer declarations is required - #if !defined(APIENTRY) - #if defined(_WIN32) - #define APIENTRY __stdcall - #else - #define APIENTRY + #if defined(__APPLE__) + #include // OpenGL 1.1 library for OSX + #include // OpenGL extensions library + #else + // APIENTRY for OpenGL function pointer declarations is required + #if !defined(APIENTRY) + #if defined(_WIN32) + #define APIENTRY __stdcall + #else + #define APIENTRY + #endif + #endif + // WINGDIAPI definition. Some Windows OpenGL headers need it + #if !defined(WINGDIAPI) && defined(_WIN32) + #define WINGDIAPI __declspec(dllimport) #endif - #endif - // WINGDIAPI definition. Some Windows OpenGL headers need it - #if !defined(WINGDIAPI) && defined(_WIN32) - #define WINGDIAPI __declspec(dllimport) - #endif - #include // OpenGL 1.1 library + #include // OpenGL 1.1 library + #endif #endif #endif @@ -2310,6 +2324,10 @@ void rlglInit(int width, int height) glShadeModel(GL_SMOOTH); // Smooth shading between vertex (vertex colors interpolation) #endif +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + swInit(width, height); +#endif + #if defined(GRAPHICS_API_OPENGL_33) || defined(GRAPHICS_API_OPENGL_ES2) // Store screen size into global variables RLGL.State.framebufferWidth = width; @@ -2336,6 +2354,10 @@ void rlglClose(void) glDeleteTextures(1, &RLGL.State.defaultTextureId); // Unload default texture TRACELOG(RL_LOG_INFO, "TEXTURE: [ID %i] Default texture unloaded successfully", RLGL.State.defaultTextureId); #endif + +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + swClose(); +#endif } // Load OpenGL extensions @@ -3673,6 +3695,20 @@ void *rlReadTexturePixels(unsigned int id, int width, int height, int format) return pixels; } +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + +void* rlGetFramebuffer(int* width, int* height) +{ + return swGetColorBuffer(&width, &height); +} + +void rlResizeFramebuffer(int width, int height) +{ + swResizeFramebuffer(width, height); +} + +#endif + // Read screen pixel data (color buffer) unsigned char *rlReadScreenPixels(int width, int height) { From 89d40510c6b130dcab2ce1dc926515f5fe0b6259 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 22:49:24 +0100 Subject: [PATCH 039/105] update rlgl.h --- src/rlgl.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/rlgl.h b/src/rlgl.h index a035481cc..3a7acec5e 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -773,7 +773,7 @@ RLAPI void rlFramebufferAttach(unsigned int fboId, unsigned int texId, int attac RLAPI bool rlFramebufferComplete(unsigned int id); // Verify framebuffer is complete RLAPI void rlUnloadFramebuffer(unsigned int id); // Delete framebuffer from GPU #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) -RLAPI void* rlGetFramebuffer(int* width, int* height); +RLAPI void rlCopyFramebuffer(int x, int y, int w, int h, int format, void* pixels); RLAPI void rlResizeFramebuffer(int width, int height); #endif @@ -3696,17 +3696,16 @@ void *rlReadTexturePixels(unsigned int id, int width, int height, int format) } #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) - -void* rlGetFramebuffer(int* width, int* height) +void rlCopyFramebuffer(int x, int y, int w, int h, int format, void* pixels) { - return swGetColorBuffer(&width, &height); + unsigned int glInternalFormat, glFormat, glType; + rlGetGlTextureFormats(format, &glInternalFormat, &glFormat, &glType); + swCopyFramebuffer(x, y, w, h, glFormat, glType, pixels); } - void rlResizeFramebuffer(int width, int height) { swResizeFramebuffer(width, height); } - #endif // Read screen pixel data (color buffer) @@ -5306,4 +5305,4 @@ static Matrix rlMatrixInvert(Matrix mat) return result; } -#endif // RLGL_IMPLEMENTATION +#endif // RLGL_IMPLEMENTATION \ No newline at end of file From be24b0635684a6536799ab0b3bd9e4917ce0af3c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:33:12 +0100 Subject: [PATCH 040/105] texture copy support --- src/external/rlsw.h | 97 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 14 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 4d77bdc68..dca834349 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -46,6 +46,10 @@ # define SW_FREE(ptr) free(ptr) #endif +#ifndef SW_GL_BINDING_COPY_TEXTURE +# define SW_GL_BINDING_COPY_TEXTURE true +#endif + #ifndef SW_COLOR_BUFFER_BITS # define SW_COLOR_BUFFER_BITS 24 #endif @@ -299,7 +303,7 @@ typedef double GLclampd; #define glDrawArrays(m, o, c) swDrawArrays(m, o, c) #define glGenTextures(c, v) swGenTextures(c, v) #define glDeleteTextures(c, v) swDeleteTextures(c, v) -#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) +#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, SW_GL_BINDING_COPY_TEXTURE, p) #define glTexParameteri(tr, pname, param) swTexParameteri(pname, param) #define glBindTexture(tr, id) swBindTexture(id) @@ -514,7 +518,7 @@ void swDrawArrays(SWdraw mode, int offset, int count); void swGenTextures(int count, uint32_t* textures); void swDeleteTextures(int count, uint32_t* textures); -void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data); +void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy, const void* data); void swTexParameteri(int param, int value); void swBindTexture(uint32_t id); @@ -590,7 +594,14 @@ typedef struct { typedef struct { - const void* pixels; + // Dirty hack for copied data + // TODO: Rework copied image handling + + union { + const void* cptr; //< NOTE: Is used for all data reads + void* ptr; //< WARNING: Should only be used to allocate and free data + } pixels; + int width; int height; int format; @@ -604,6 +615,8 @@ typedef struct { float tx; float ty; + bool copy; + } sw_texture_t; typedef struct { @@ -1207,6 +1220,43 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) return -1; // Unsupported format } +int sw_get_pixel_bpp(sw_pixelformat_e format) +{ + int bpp = 0; + + switch (format) + { + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: bpp = 8; break; + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: bpp = 16; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: bpp = 32; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: bpp = 24; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32: bpp = 32; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: bpp = 32*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: bpp = 32*4; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16: bpp = 16; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: bpp = 16*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: bpp = 16*4; break; + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: bpp = 4; break; + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: bpp = 8; break; + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: bpp = 2; break; + default: break; + } + + return bpp; +} + + static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); @@ -1640,7 +1690,7 @@ static inline void sw_texture_sample_nearest(float* color, const sw_texture_t* t int x, y; sw_texture_map(&x, u, tex->width, tex->sWrap); sw_texture_map(&y, v, tex->height, tex->tWrap); - sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); + sw_get_pixel(color, tex->pixels.cptr, y * tex->width + x, tex->format); } static inline void sw_texture_sample_linear(float* color, const sw_texture_t* tex, float u, float v) @@ -1655,10 +1705,10 @@ static inline void sw_texture_sample_linear(float* color, const sw_texture_t* te float fy = v * (tex->height - 1) - y0; float c00[4], c10[4], c01[4], c11[4]; - sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); - sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); - sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); - sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + sw_get_pixel(c00, tex->pixels.cptr, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels.cptr, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels.cptr, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels.cptr, y1 * tex->width + x1, tex->format); float c0[4], c1[4]; for (int i = 0; i < 4; i++) { @@ -2937,7 +2987,7 @@ static inline bool sw_is_texture_valid(uint32_t id) if (id == 0) valid = false; else if (id >= SW_MAX_TEXTURES) valid = false; - else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + else if (RLSW.loadedTextures[id].pixels.cptr == 0) valid = false; return true; } @@ -3099,7 +3149,7 @@ bool swInit(int w, int h) 1.0f, 1.0f, 1.0f, }; - RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].pixels.cptr = defTex; RLSW.loadedTextures[0].width = 2; RLSW.loadedTextures[0].height = 2; RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; @@ -3990,12 +4040,15 @@ void swDeleteTextures(int count, uint32_t* textures) RLSW.errCode = SW_INVALID_VALUE; continue; } - RLSW.loadedTextures[textures[i]].pixels = 0; + if (RLSW.loadedTextures[textures[i]].copy) { + SW_FREE(RLSW.loadedTextures[textures[i]].pixels.ptr); + } + RLSW.loadedTextures[textures[i]].pixels.cptr = NULL; RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i]; } } -void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data) +void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy, const void* data) { uint32_t id = RLSW.currentTexture; @@ -4013,12 +4066,28 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, const voi sw_texture_t* texture = &RLSW.loadedTextures[id]; - texture->pixels = data; + if (copy) { + int bpp = sw_get_pixel_bpp(pixelFormat); + int size = bpp * width * height; + texture->pixels.ptr = SW_MALLOC(size); + if (texture->pixels.ptr == NULL) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory... + return; + } + for (int i = 0; i < size; i++) { + ((uint8_t*)texture->pixels.ptr)[i] = ((uint8_t*)data)[i]; + } + } + else { + texture->pixels.cptr = data; + } + texture->width = width; texture->height = height; texture->format = pixelFormat; texture->tx = 1.0f / width; texture->ty = 1.0f / height; + texture->copy = copy; } void swTexParameteri(int param, int value) @@ -4080,7 +4149,7 @@ void swBindTexture(uint32_t id) return; } - if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + if (id > 0 && RLSW.loadedTextures[id].pixels.cptr == NULL) { RLSW.errCode = SW_INVALID_OPERATION; return; } From f902cc3f29c2db45b79c9f6f808967e18828e3d0 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:34:21 +0100 Subject: [PATCH 041/105] fix typo.. --- src/external/rlsw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index dca834349..2627ddc3d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -458,7 +458,7 @@ void swEnable(SWstate state); void swDisable(SWstate state); void swGetFloatv(SWget name, float* v); -const char* glGetString(SWget name); +const char* swGetString(SWget name); void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -3320,7 +3320,7 @@ void swGetFloatv(SWget name, float* v) } } -const char* glGetString(SWget name) +const char* swGetString(SWget name) { const char* result = NULL; From f3de37eff3c5086192a332edce4091038ec06f06 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:35:51 +0100 Subject: [PATCH 042/105] add get error function --- src/external/rlsw.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 2627ddc3d..ed2eed4c9 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -252,6 +252,7 @@ typedef double GLclampd; #define glDisable(state) swDisable(state) #define glGetFloatv(pname, params) swGetFloatv(pname, params) #define glGetString(pname) swGetString(pname) +#define glGetError() swGetError() #define glViewport(x, y, w, h) swViewport(x, y, w, h) #define glScissor(x, y, w, h) swScissor(x, y, w, h) #define glClearColor(r, g, b, a) swClearColor(r, g, b, a) @@ -459,6 +460,7 @@ void swDisable(SWstate state); void swGetFloatv(SWget name, float* v); const char* swGetString(SWget name); +SWerrcode swGetError(void); void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -3345,6 +3347,13 @@ const char* swGetString(SWget name) return result; } +SWerrcode swGetError(void) +{ + SWerrcode ret = RLSW.errCode; + RLSW.errCode = SW_NO_ERROR; + return ret; +} + void swViewport(int x, int y, int width, int height) { if (x <= -width || y <= -height) { From a420e5ab822d0fa02d8fa20f97009220980869a5 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:38:19 +0100 Subject: [PATCH 043/105] def sw alloc macros --- src/rlgl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rlgl.h b/src/rlgl.h index 3a7acec5e..3daff2348 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -844,6 +844,9 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #if defined(GRAPHICS_API_OPENGL_11) #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) #define RLSW_IMPL + #define SW_MALLOC(sz) RL_MALLOC(sz) + #define SW_REALLOC(ptr, newSz) RL_REALLOC(ptr, newSz) + #define SW_FREE(ptr) RL_FREE(ptr) #include // OpenGL 1.1 software implementation #else #if defined(__APPLE__) From dce242435c0bcda82ab2bed6a9213d8913426529 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 23 Apr 2025 16:06:39 +0200 Subject: [PATCH 044/105] reimpl get color buffer func just in case --- src/external/rlsw.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index ed2eed4c9..b0abf331c 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -454,6 +454,7 @@ void swClose(void); bool swResizeFramebuffer(int w, int h); void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels); +void* swGetColorBuffer(int* w, int* h); void swEnable(SWstate state); void swDisable(SWstate state); @@ -3211,6 +3212,14 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, } } +void* swGetColorBuffer(int* w, int* h) +{ + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; +} + void swEnable(SWstate state) { switch (state) { From a40e8f463c19521af2075be6af6a3487a87dfe7b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 23 Apr 2025 16:12:57 +0200 Subject: [PATCH 045/105] remove normal interpolation --- src/external/rlsw.h | 47 ++++++--------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index b0abf331c..e357bec9e 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -124,7 +124,7 @@ typedef double GLclampd; //#define GL_COLOR_WRITEMASK 0x0C23 //#define GL_CURRENT_INDEX 0x0B01 #define GL_CURRENT_COLOR 0x0B00 -#define GL_CURRENT_NORMAL 0x0B02 +//#define GL_CURRENT_NORMAL 0x0B02 //#define GL_CURRENT_RASTER_COLOR 0x0B04 //#define GL_CURRENT_RASTER_DISTANCE 0x0B09 //#define GL_CURRENT_RASTER_INDEX 0x0B05 @@ -154,7 +154,7 @@ typedef double GLclampd; #define GL_TEXTURE 0x1702 #define GL_VERTEX_ARRAY 0x8074 -#define GL_NORMAL_ARRAY 0x8075 +//#define GL_NORMAL_ARRAY 0x8075 #define GL_COLOR_ARRAY 0x8076 //#define GL_INDEX_ARRAY 0x8077 #define GL_TEXTURE_COORD_ARRAY 0x8078 @@ -293,13 +293,11 @@ typedef double GLclampd; #define glColor4fv(v) swColor4fv(v) #define glTexCoord2f(u, v) swTexCoord2f(u, v) #define glTexCoord2fv(v) swTexCoord2fv(v) -#define glNormal3f(x, y, z) swNormal3f(x, y, z) -#define glNormal3fv(v) swNormal3fv(v) + #define glEnableClientState(t) ((void)(t)) #define glDisableClientState(t) swBindArray(t, 0) #define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) #define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) -#define glNormalPointer(t, s, p) swBindArray(SW_NORMAL_ARRAY, p) #define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) #define glDrawArrays(m, o, c) swDrawArrays(m, o, c) #define glGenTextures(c, v) swGenTextures(c, v) @@ -322,6 +320,9 @@ typedef double GLclampd; #define glTexSubImage2D(X,Y,Z,W,A,B,C,D,E) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A),(void)(B),(void)(C),(void)(D),(void)(E)) #define glGetTexImage(X,Y,Z,W,A) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A)) #define glDrawElements(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) +#define glNormal3f(X,Y,Z) ((void)(X),(void)(Y),(void)(Z)) +#define glNormal3fv(X) ((void)(X)) +#define glNormalPointer(X,Y,Z) ((void)(X),(void)(Y),(void)(Z)) /* === RLSW Enums === */ @@ -341,7 +342,6 @@ typedef enum { SW_EXTENSIONS = GL_EXTENSIONS, SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE, SW_CURRENT_COLOR = GL_CURRENT_COLOR, - SW_CURRENT_NORMAL = GL_CURRENT_NORMAL, SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS, SW_MODELVIEW_MATRIX = GL_MODELVIEW_MATRIX, SW_MODELVIEW_STACK_DEPTH = GL_MODELVIEW_STACK_DEPTH, @@ -366,7 +366,6 @@ typedef enum { typedef enum { SW_VERTEX_ARRAY = GL_VERTEX_ARRAY, SW_TEXTURE_COORD_ARRAY = GL_TEXTURE_COORD_ARRAY, - SW_NORMAL_ARRAY = GL_NORMAL_ARRAY, SW_COLOR_ARRAY = GL_COLOR_ARRAY } SWarray; @@ -512,9 +511,6 @@ void swColor4fv(const float* v); void swTexCoord2f(float u, float v); void swTexCoord2fv(const float* v); -void swNormal3f(float x, float y, float z); -void swNormal3fv(const float* v); - void swBindArray(SWarray type, void *buffer); void swDrawArrays(SWdraw mode, int offset, int count); @@ -586,7 +582,6 @@ typedef uint16_t sw_half_t; typedef struct { float position[4]; // Position coordinates - float normal[3]; // Normal vector float texcoord[2]; // Texture coordinates float color[4]; // Color @@ -654,7 +649,6 @@ typedef struct { struct { float* positions; float* texcoords; - float* normals; uint8_t* colors; } array; @@ -3134,10 +3128,6 @@ bool swInit(int w, int h) RLSW.vertexBuffer[0].texcoord[0] = 0.0f; RLSW.vertexBuffer[0].texcoord[1] = 0.0f; - RLSW.vertexBuffer[0].normal[0] = 0.0f; - RLSW.vertexBuffer[0].normal[1] = 0.0f; - RLSW.vertexBuffer[0].normal[2] = 1.0f; - RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; @@ -3301,11 +3291,6 @@ void swGetFloatv(SWget name, float* v) v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[2]; v[3] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[3]; break; - case SW_CURRENT_NORMAL: - v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[0]; - v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[1]; - v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[2]; - break; case SW_CURRENT_TEXTURE_COORDS: v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[0]; v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[1]; @@ -3965,20 +3950,6 @@ void swTexCoord2fv(const float* v) RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; } -void swNormal3f(float x, float y, float z) -{ - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; -} - -void swNormal3fv(const float* v) -{ - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; -} - void swBindArray(SWarray type, void *buffer) { switch (type) { @@ -3988,9 +3959,6 @@ void swBindArray(SWarray type, void *buffer) case SW_TEXTURE_COORD_ARRAY: RLSW.array.texcoords = buffer; break; - case SW_NORMAL_ARRAY: - RLSW.array.normals = buffer; - break; case SW_COLOR_ARRAY: RLSW.array.colors = buffer; break; @@ -4012,9 +3980,6 @@ void swDrawArrays(SWdraw mode, int offset, int count) if (RLSW.array.texcoords) { swTexCoord2fv(RLSW.array.texcoords + 2 * i); } - if (RLSW.array.normals) { - swNormal3fv(RLSW.array.normals + 3 * i); - } if (RLSW.array.colors) { swColor4ubv(RLSW.array.colors + 4 * i); } From 35371bf2b2c09e4af39112407d31eee82b482965 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 23 Apr 2025 16:50:46 +0200 Subject: [PATCH 046/105] review texture wrap --- src/external/rlsw.h | 58 +++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e357bec9e..e35db10d0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -193,8 +193,7 @@ typedef double GLclampd; #define GL_LINEAR 0x2601 #define GL_REPEAT 0x2901 -#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) -#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) +#define GL_CLAMP 0x2900 #define GL_TEXTURE_MAG_FILTER 0x2800 #define GL_TEXTURE_MIN_FILTER 0x2801 @@ -425,8 +424,7 @@ typedef enum { typedef enum { SW_REPEAT = GL_REPEAT, - SW_CLAMP_TO_EDGE = GL_CLAMP_TO_EDGE, - SW_MIRRORED_REPEAT = GL_MIRRORED_REPEAT + SW_CLAMP = GL_CLAMP, } SWwrap; typedef enum { @@ -1644,40 +1642,14 @@ static inline void sw_set_pixel(void* pixels, uint32_t offset, sw_pixelformat_e /* === Texture Sampling Part === */ -static inline void sw_texture_map_repeat(int* out, float in, int max) -{ - // Upscale to nearest texture coordinates - // NOTE: We use '(int)(x+0.5)' although this is incorrect - // regarding the direction of rounding in case of negative values - // and also less accurate than roundf, but it remains so much more - // efficient that it is preferable for now to opt for this option. - - *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); -} - -static inline void sw_texture_map_clamp_to_edge(int* out, float in, int max) -{ - *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); -} - -static inline void sw_texture_map_mirrored_repeat(int* out, float in, int max) -{ - in = fmodf(fabsf(in), 2); - if (in > 1.0f) in = 1.0f - (in - 1.0f); - *out = (int)(in * (max - 1) + 0.5f); -} - static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) { switch (mode) { case SW_REPEAT: - sw_texture_map_repeat(out, in, max); + *out = (int)((in - floorf(in)) * max + 0.5f); break; - case SW_CLAMP_TO_EDGE: - sw_texture_map_clamp_to_edge(out, in, max); - break; - case SW_MIRRORED_REPEAT: - sw_texture_map_mirrored_repeat(out, in, max); + case SW_CLAMP: + *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); break; } } @@ -1733,16 +1705,18 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa float du2 = xDu * xDu + yDu * yDu; float dv2 = xDv * xDv + yDv * yDv; float L2 = (du2 > dv2) ? du2 : dv2; - - bool useMinFilter = (L2 > 1.0f); - int filter = useMinFilter ? tex->minFilter : tex->magFilter; - - if (filter == SW_NEAREST) { + + SWfilter filter = (L2 > 1.0f) + ? tex->minFilter : tex->magFilter; + + switch (filter) { + case SW_NEAREST: sw_texture_sample_nearest(color, tex, u, v); - } - else /* SW_LINEAR */ { + break; + case SW_LINEAR: sw_texture_sample_linear(color, tex, u, v); - } + break; + } } @@ -2996,7 +2970,7 @@ static inline bool sw_is_texture_filter_valid(int filter) static inline bool sw_is_texture_wrap_valid(int wrap) { - return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); + return (wrap == SW_REPEAT || wrap == SW_CLAMP); } static inline bool sw_is_draw_mode_valid(int mode) From fbdd38c92cba000303ccae1ae76a2557e49bfd5e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 23 Apr 2025 17:12:05 +0200 Subject: [PATCH 047/105] fix ndc projection (viewport/scissor) --- src/external/rlsw.h | 72 ++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 46 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e35db10d0..effbebbc6 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -28,10 +28,6 @@ #include #include -// TODO: Review the use of viewport dimensions stored with -1 -// It seems there are issues with the NDC -> screen projection -// Also, consider testing and reviewing, if necessary, scissor clipping as well as line clipping - /* === RLSW Definition And Macros === */ #ifndef SW_MALLOC @@ -2727,13 +2723,13 @@ static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ { \ if (CHECK_BOUNDS == 1) \ { \ - if (x < RLSW.vpMin[0] || x > RLSW.vpMax[0]) return; \ - if (y < RLSW.vpMin[1] || y > RLSW.vpMax[1]) return; \ + if (x < RLSW.vpMin[0] || x >= RLSW.vpMax[0]) return; \ + if (y < RLSW.vpMin[1] || y >= RLSW.vpMax[1]) return; \ } \ else if (CHECK_BOUNDS == SW_SCISSOR_TEST) \ { \ - if (x < RLSW.scMin[0] || x > RLSW.scMax[0]) return; \ - if (y < RLSW.scMin[1] || y > RLSW.scMax[1]) return; \ + if (x < RLSW.scMin[0] || x >= RLSW.scMax[0]) return; \ + if (y < RLSW.scMin[1] || y >= RLSW.scMax[1]) return; \ } \ \ int offset = y * RLSW.framebuffer.width + x; \ @@ -3324,59 +3320,43 @@ SWerrcode swGetError(void) void swViewport(int x, int y, int width, int height) { - if (x <= -width || y <= -height) { - RLSW.errCode = SW_INVALID_OPERATION; + if (width < 0 || height < 0) { + RLSW.errCode = SW_INVALID_VALUE; return; } RLSW.vpPos[0] = x; RLSW.vpPos[1] = y; + RLSW.vpDim[0] = width; + RLSW.vpDim[1] = height; - RLSW.vpDim[0] = width - 1; - RLSW.vpDim[1] = height - 1; - - RLSW.vpMin[0] = (x < 0) ? 0 : x; - RLSW.vpMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; + RLSW.vpMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1); + RLSW.vpMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1); + RLSW.vpMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1); + RLSW.vpMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1); } void swScissor(int x, int y, int width, int height) { - sw_clampi(x, 0, RLSW.framebuffer.width); - sw_clampi(y, 0, RLSW.framebuffer.height); - sw_clampi(width, 0, RLSW.framebuffer.width); - sw_clampi(width, 0, RLSW.framebuffer.height); + if (width < 0 || height < 0) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } RLSW.scPos[0] = x; RLSW.scPos[1] = y; + RLSW.scDim[0] = width; + RLSW.scDim[1] = height; - RLSW.scDim[0] = width - 1; - RLSW.scDim[1] = height - 1; + RLSW.scMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1); + RLSW.scMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1); + RLSW.scMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1); + RLSW.scMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1); - RLSW.scMin[0] = (x < 0) ? 0 : x; - RLSW.scMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.scMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.scMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; - - RLSW.scHMin[0] = (2.0f * (float)RLSW.scMin[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; - RLSW.scHMax[0] = (2.0f * (float)RLSW.scMax[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; - RLSW.scHMax[1] = 1.0f - (2.0f * (float)RLSW.scMin[1] / (float)(RLSW.vpDim[1] + 1)); - RLSW.scHMin[1] = 1.0f - (2.0f * (float)RLSW.scMax[1] / (float)(RLSW.vpDim[1] + 1)); + RLSW.scHMin[0] = (2.0f * (float)RLSW.scMin[0] / (float)RLSW.vpDim[0]) - 1.0f; + RLSW.scHMax[0] = (2.0f * (float)RLSW.scMax[0] / (float)RLSW.vpDim[0]) - 1.0f; + RLSW.scHMax[1] = 1.0f - (2.0f * (float)RLSW.scMin[1] / (float)RLSW.vpDim[1]); + RLSW.scHMin[1] = 1.0f - (2.0f * (float)RLSW.scMax[1] / (float)RLSW.vpDim[1]); } void swClearColor(float r, float g, float b, float a) From aff76d9692f54ede766c6d414d7652660e187170 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 23 Apr 2025 17:48:04 +0200 Subject: [PATCH 048/105] impl framebuffer blit function --- src/external/rlsw.h | 52 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index effbebbc6..9b6e975a0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -447,6 +447,7 @@ void swClose(void); bool swResizeFramebuffer(int w, int h); void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels); +void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void* pixels); void* swGetColorBuffer(int* w, int* h); void swEnable(SWstate state); @@ -3160,14 +3161,57 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* src = RLSW.framebuffer.color; - int srcW = RLSW.framebuffer.width; - int srcHm1 = RLSW.framebuffer.height - 1; + int wSrc = RLSW.framebuffer.width; + int hSrcM1 = RLSW.framebuffer.height - 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float color[4]; - sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, (srcHm1 - y) * srcW + x)); - sw_set_pixel(pixels, y * srcW + x, pFormat, color); + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, (hSrcM1 - y) * wSrc + x)); + sw_set_pixel(pixels, y * wSrc + x, pFormat, color); + } + } +} + +void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, + int xSrc, int ySrc, int wSrc, int hSrc, + SWformat format, SWtype type, void* pixels) +{ + sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); + + void* src = RLSW.framebuffer.color; + int fbWidth = RLSW.framebuffer.width; + int fbHeight = RLSW.framebuffer.height; + + // Calculation of scaling factors in 16.16 (fixed-point) + const int xScale = (wSrc << 16) / wDst; + const int yScale = (hSrc << 16) / hDst; + + const int xSrcBase = xSrc << 16; + const int ySrcBase = ySrc << 16; + + for (int y = 0; y < hDst; y++) { + const int ySrcFixed = ySrcBase + y * yScale; + const int ySrcInt = ySrcFixed >> 16; + + if ((unsigned)ySrcInt >= (unsigned)fbHeight) { + continue; + } + + for (int x = 0; x < wDst; x++) { + const int xSrcFixed = xSrcBase + x * xScale; + const int xSrcInt = xSrcFixed >> 16; + + if ((unsigned)xSrcInt >= (unsigned)fbWidth) { + continue; + } + + float color[4]; + const int srcIndex = ySrcInt * fbWidth + xSrcInt; + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, srcIndex)); + + const int dstIndex = (yDst + y) * wDst + (xDst + x); + sw_set_pixel(pixels, dstIndex, pFormat, color); } } } From 0a6b41d189e6264d9ef3770db6ecd98239868d02 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 4 May 2025 16:35:03 +0200 Subject: [PATCH 049/105] reduce matrix compuations and memory usage --- src/external/rlsw.h | 258 +++++++++++++++++++++++++------------------- 1 file changed, 150 insertions(+), 108 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 9b6e975a0..656707c44 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -627,7 +627,6 @@ typedef struct { float clearDepth; // Depth value used to clear the screen uint32_t currentTexture; - sw_matrix_t *currentMatrix; int vpPos[2]; // Represents the top-left corner of the viewport int vpDim[2]; // Represents the dimensions of the viewport (minus one) @@ -655,22 +654,16 @@ typedef struct { float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width - sw_matrix_t matProjection; // Projection matrix, user adjustable - sw_matrix_t matTexture; // Texture matrix, user adjustable - sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) - sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) - sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally - sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations uint32_t stackProjectionCounter; // Counter for matrix stack operations uint32_t stackModelviewCounter; // Counter for matrix stack operations uint32_t stackTextureCounter; // Counter for matrix stack operations - SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) - bool modelMatrixUsed; // Flag indicating if the model matrix is used - bool needToUpdateMVP; + sw_matrix_t* currentMatrix; // Pointer to the currently used matrix according to the mode + sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally + bool isDirtyMVP; // Indicates if the MVP matrix should be rebuilt SWfactor srcFactor; SWfactor dstFactor; @@ -721,6 +714,7 @@ static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const } } +/* static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) { float tmp[4] = { @@ -734,6 +728,7 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma dst[i] = tmp[i]; } } +*/ static inline float sw_saturate(float x) { @@ -1977,14 +1972,9 @@ static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_V return n > 0; } -static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline void sw_triangle_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { - // Step 1: MVP projection for all vertices - for (int i = 0; i < *vertexCounter; i++) { - sw_vec4_transform(polygon[i].homogeneous, polygon[i].position, RLSW.matMVP); - } - - // Step 2: Face culling - discard triangles facing away + // Step 1: Face culling - discard triangles facing away if (RLSW.stateFlags & SW_STATE_CULL_FACE) { // NOTE: Face culling is done before clipping to avoid unnecessary computations. @@ -2011,7 +2001,7 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP } } - // Step 3: Clipping and perspective projection + // Step 2: Clipping and perspective projection if (sw_triangle_clip(polygon, vertexCounter) && *vertexCounter >= 3) { // Transformation to screen space and normalization @@ -2316,7 +2306,7 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* polygon[1] = *v1; polygon[2] = *v2; - sw_triangle_project_and_clip(polygon, &vertexCounter); + sw_triangle_clip_and_project(polygon, &vertexCounter); if (vertexCounter < 3) { return; @@ -2425,11 +2415,8 @@ static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) return true; } -static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) +static inline bool sw_line_clip_and_project(sw_vertex_t* v0, sw_vertex_t* v1) { - sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); - sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); - if (!sw_line_clip(v0, v1)) { return false; } @@ -2645,7 +2632,7 @@ DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) { - if (!sw_line_project_and_clip(v0, v1)) { + if (!sw_line_clip_and_project(v0, v1)) { return; } @@ -2682,10 +2669,8 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) /* === Point Rendering Part === */ -static inline bool sw_point_project_and_clip(sw_vertex_t* v) +static inline bool sw_point_clip_and_project(sw_vertex_t* v) { - sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); - if (v->homogeneous[3] != 1.0f) { for (int_fast8_t i = 0; i < 3; i++) { if (v->homogeneous[i] < -v->homogeneous[3] || v->homogeneous[i] > v->homogeneous[3]) { @@ -2824,7 +2809,7 @@ DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND_SCISSOR, sw_point_ra static inline void sw_point_render(sw_vertex_t* v) { - if (!sw_point_project_and_clip(v)) { + if (!sw_point_clip_and_project(v)) { return; } @@ -3083,13 +3068,17 @@ bool swInit(int w, int h) RLSW.clearDepth = 1.0f; RLSW.currentMatrixMode = SW_MODELVIEW; - RLSW.currentMatrix = &RLSW.matView; - RLSW.needToUpdateMVP = true; + RLSW.currentMatrix = &RLSW.stackModelview[0]; - sw_matrix_id(RLSW.matProjection); - sw_matrix_id(RLSW.matTexture); - sw_matrix_id(RLSW.matModel); - sw_matrix_id(RLSW.matView); + sw_matrix_id(RLSW.stackProjection[0]); + sw_matrix_id(RLSW.stackModelview[0]); + sw_matrix_id(RLSW.stackTexture[0]); + sw_matrix_id(RLSW.matMVP); + + RLSW.stackProjectionCounter = 1; + RLSW.stackModelviewCounter = 1; + RLSW.stackTextureCounter = 1; + RLSW.isDirtyMVP = false; RLSW.vertexBuffer[0].color[0] = 1.0f; RLSW.vertexBuffer[0].color[1] = 1.0f; @@ -3472,14 +3461,13 @@ void swMatrixMode(SWmatrix mode) { switch (mode) { case SW_PROJECTION: - RLSW.currentMatrix = &RLSW.matProjection; + RLSW.currentMatrix = &RLSW.stackProjection[RLSW.stackProjectionCounter - 1]; break; case SW_MODELVIEW: - RLSW.currentMatrix = RLSW.modelMatrixUsed - ? &RLSW.matModel : &RLSW.matView; + RLSW.currentMatrix = &RLSW.stackModelview[RLSW.stackModelviewCounter - 1]; break; case SW_TEXTURE: - RLSW.currentMatrix = &RLSW.matTexture; + RLSW.currentMatrix = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; break; default: RLSW.errCode = SW_INVALID_ENUM; @@ -3494,41 +3482,57 @@ void swPushMatrix(void) switch (RLSW.currentMatrixMode) { case SW_PROJECTION: - if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; + { + if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + + int iOld = RLSW.stackProjectionCounter - 1; + int iNew = RLSW.stackProjectionCounter++; + + for (int i = 0; i < 16; i++) { + RLSW.stackProjection[iNew][i] = RLSW.stackProjection[iOld][i]; + } + + RLSW.currentMatrix = &RLSW.stackProjection[iNew]; } - for (int i = 0; i < 16; i++) { - RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; - } - RLSW.stackProjectionCounter++; break; case SW_MODELVIEW: - if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - if (RLSW.modelMatrixUsed) { - for (int i = 0; i < 16; i++) { - RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; + { + if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; } - RLSW.stackModelviewCounter++; - } else { - RLSW.currentMatrix = &RLSW.matModel; - RLSW.modelMatrixUsed = true; + + int iOld = RLSW.stackModelviewCounter - 1; + int iNew = RLSW.stackModelviewCounter++; + + for (int i = 0; i < 16; i++) { + RLSW.stackModelview[iNew][i] = RLSW.stackModelview[iOld][i]; + } + + RLSW.currentMatrix = &RLSW.stackModelview[iNew]; } break; case SW_TEXTURE: - if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; + { + if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + + int iOld = RLSW.stackTextureCounter - 1; + int iNew = RLSW.stackTextureCounter++; + + for (int i = 0; i < 16; i++) { + RLSW.stackTexture[iNew][i] = RLSW.stackTexture[iOld][i]; + } + + RLSW.currentMatrix = &RLSW.stackTexture[iNew]; } - for (int i = 0; i < 16; i++) { - RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; - } - RLSW.stackTextureCounter++; break; } @@ -3539,41 +3543,37 @@ void swPopMatrix(void) switch (RLSW.currentMatrixMode) { case SW_PROJECTION: - if (RLSW.stackProjectionCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackProjectionCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; + { + if (RLSW.stackProjectionCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + + RLSW.currentMatrix = &RLSW.stackProjection[--RLSW.stackProjectionCounter]; + RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed } break; case SW_MODELVIEW: - if (RLSW.stackModelviewCounter == 0) { - if (!RLSW.modelMatrixUsed) { + { + if (RLSW.stackModelviewCounter <= 0) { RLSW.errCode = SW_STACK_UNDERFLOW; return; } - sw_matrix_id(RLSW.matModel); - RLSW.currentMatrix = &RLSW.matView; - RLSW.modelMatrixUsed = false; - } else { - RLSW.stackModelviewCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; - } + + RLSW.currentMatrix = &RLSW.stackModelview[--RLSW.stackModelviewCounter]; + RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed } break; case SW_TEXTURE: - if (RLSW.stackTextureCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackTextureCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; + { + if (RLSW.stackTextureCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + + RLSW.currentMatrix = &RLSW.stackTexture[--RLSW.stackTextureCounter]; } break; @@ -3584,7 +3584,9 @@ void swLoadIdentity(void) { sw_matrix_id(*RLSW.currentMatrix); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swTranslatef(float x, float y, float z) @@ -3598,7 +3600,9 @@ void swTranslatef(float x, float y, float z) sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swRotatef(float angle, float x, float y, float z) @@ -3642,7 +3646,9 @@ void swRotatef(float angle, float x, float y, float z) sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swScalef(float x, float y, float z) @@ -3656,14 +3662,18 @@ void swScalef(float x, float y, float z) sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swMultMatrixf(const float* mat) { sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) @@ -3696,7 +3706,9 @@ void swFrustum(double left, double right, double bottom, double top, double znea sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) @@ -3729,15 +3741,33 @@ void swOrtho(double left, double right, double bottom, double top, double znear, sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - RLSW.needToUpdateMVP = true; + if (RLSW.currentMatrixMode != SW_TEXTURE) { + RLSW.isDirtyMVP = true; + } } void swBegin(SWdraw mode) { + /* --- Check if the draw mode is valid --- */ + if (!sw_is_draw_mode_valid(mode)) { RLSW.errCode = SW_INVALID_ENUM; return; } + + /* --- Recalculate the MVP if this is needed --- */ + + if (RLSW.isDirtyMVP) { + sw_matrix_mul( + RLSW.matMVP, + RLSW.stackModelview[RLSW.stackModelviewCounter - 1], + RLSW.stackProjection[RLSW.stackProjectionCounter - 1] + ); + RLSW.isDirtyMVP = false; + } + + /* --- Initialize some values --- */ + RLSW.vertexCounter = 0; RLSW.drawMode = mode; } @@ -3797,10 +3827,24 @@ void swVertex4f(float x, float y, float z, float w) void swVertex4fv(const float* v) { + /* --- Copy the position in the current vertex --- */ + + sw_vertex_t* vertex = &RLSW.vertexBuffer[RLSW.vertexCounter++]; + for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; + vertex->position[i] = v[i]; } - RLSW.vertexCounter++; + + /* --- Calculation of homogeneous coordinates --- */ + + const sw_matrix_t* mat = &RLSW.matMVP; + + vertex->homogeneous[0] = (*mat)[0] * v[0] + (*mat)[4] * v[1] + (*mat)[8] * v[2] + (*mat)[12] * v[3]; + vertex->homogeneous[1] = (*mat)[1] * v[0] + (*mat)[5] * v[1] + (*mat)[9] * v[2] + (*mat)[13] * v[3]; + vertex->homogeneous[2] = (*mat)[2] * v[0] + (*mat)[6] * v[1] + (*mat)[10] * v[2] + (*mat)[14] * v[3]; + vertex->homogeneous[3] = (*mat)[3] * v[0] + (*mat)[7] * v[1] + (*mat)[11] * v[2] + (*mat)[15] * v[3]; + + /* --- Obtaining the number of vertices needed for this primitive --- */ int neededVertices = 0; switch (RLSW.drawMode) { @@ -3818,14 +3862,9 @@ void swVertex4fv(const float* v) break; } + /* --- Immediate rendering of the primitive if the required number is reached --- */ + if (RLSW.vertexCounter == neededVertices) { - - if (RLSW.needToUpdateMVP) { - RLSW.needToUpdateMVP = false; - sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); - sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); - } - switch (RLSW.polyMode) { case SW_FILL: sw_poly_fill_render(); @@ -3837,7 +3876,6 @@ void swVertex4fv(const float* v) sw_poly_point_render(); break; } - RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; RLSW.vertexCounter = 0; } @@ -3932,8 +3970,10 @@ void swColor4fv(const float* v) void swTexCoord2f(float u, float v) { - float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; + const sw_matrix_t* mat = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; + + float s = (*mat)[0]*u + (*mat)[4]*v + (*mat)[12]; + float t = (*mat)[1]*u + (*mat)[5]*v + (*mat)[13]; RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; @@ -3941,8 +3981,10 @@ void swTexCoord2f(float u, float v) void swTexCoord2fv(const float* v) { - float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; + const sw_matrix_t* mat = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; + + float s = (*mat)[0]*v[0] + (*mat)[4]*v[1] + (*mat)[12]; + float t = (*mat)[1]*v[0] + (*mat)[5]*v[1] + (*mat)[13]; RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; From 0e58392e8f4125b376cdcd3374825ff81f2f771a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 4 May 2025 16:57:51 +0200 Subject: [PATCH 050/105] swBegin tweaks --- src/external/rlsw.h | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 656707c44..d9540ceb1 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -626,8 +626,6 @@ typedef struct { float clearColor[4]; // Color used to clear the screen float clearDepth; // Depth value used to clear the screen - uint32_t currentTexture; - int vpPos[2]; // Represents the top-left corner of the viewport int vpDim[2]; // Represents the dimensions of the viewport (minus one) int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) @@ -640,6 +638,8 @@ typedef struct { float scHMin[2]; // Represents the minimum renderable point of the scissor rect in clip space float scHMax[2]; // Represents the maximum renderable point of the scissor rect in clip space + uint32_t currentTexture; + struct { float* positions; float* texcoords; @@ -651,6 +651,7 @@ typedef struct { SWdraw drawMode; // Current primitive mode (e.g., lines, triangles) SWpoly polyMode; // Current polygon filling mode (e.g., lines, triangles) + int reqVertices; // Number of vertices required for the primitive being drawn float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width @@ -3766,6 +3767,23 @@ void swBegin(SWdraw mode) RLSW.isDirtyMVP = false; } + /* --- Obtaining the number of vertices needed for this primitive --- */ + + switch (mode) { + case SW_POINTS: + RLSW.reqVertices = 1; + break; + case SW_LINES: + RLSW.reqVertices = 2; + break; + case SW_TRIANGLES: + RLSW.reqVertices = 3; + break; + case SW_QUADS: + RLSW.reqVertices = 4; + break; + } + /* --- Initialize some values --- */ RLSW.vertexCounter = 0; @@ -3774,7 +3792,7 @@ void swBegin(SWdraw mode) void swEnd(void) { - RLSW.vertexCounter = 0; + RLSW.drawMode = 0; } void swVertex2i(int x, int y) @@ -3844,27 +3862,9 @@ void swVertex4fv(const float* v) vertex->homogeneous[2] = (*mat)[2] * v[0] + (*mat)[6] * v[1] + (*mat)[10] * v[2] + (*mat)[14] * v[3]; vertex->homogeneous[3] = (*mat)[3] * v[0] + (*mat)[7] * v[1] + (*mat)[11] * v[2] + (*mat)[15] * v[3]; - /* --- Obtaining the number of vertices needed for this primitive --- */ - - int neededVertices = 0; - switch (RLSW.drawMode) { - case SW_POINTS: - neededVertices = 1; - break; - case SW_LINES: - neededVertices = 2; - break; - case SW_TRIANGLES: - neededVertices = 3; - break; - case SW_QUADS: - neededVertices = 4; - break; - } - /* --- Immediate rendering of the primitive if the required number is reached --- */ - if (RLSW.vertexCounter == neededVertices) { + if (RLSW.vertexCounter == RLSW.reqVertices) { switch (RLSW.polyMode) { case SW_FILL: sw_poly_fill_render(); @@ -3876,7 +3876,7 @@ void swVertex4fv(const float* v) sw_poly_point_render(); break; } - RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; + RLSW.vertexBuffer[0] = RLSW.vertexBuffer[RLSW.reqVertices - 1]; RLSW.vertexCounter = 0; } else { From c6cf6c953a0c197c7efd87c730d568f2d35ab5a3 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 4 May 2025 17:10:41 +0200 Subject: [PATCH 051/105] preventing a possible division by zero --- src/external/rlsw.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d9540ceb1..bd7246dc7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2044,13 +2044,20 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, int xEnd = (int)(end->screen[0] + 0.5f); \ int y = (int)(start->screen[1] + 0.5f); \ \ - /* Calculate the initial interpolation parameter and its increment */ \ - float dt = 1.0f / (end->screen[0] - start->screen[0]); \ - float t = (xStart - start->screen[0]) * dt; \ + /* Safely compute the inverse horizontal distance */ \ + float dx = end->screen[0] - start->screen[0]; \ + if (fabsf(dx) < 1e-6f) return; \ \ + /* Calculate the interpolation step along the X axis */ \ + float dt = 1.0f / dx; \ + \ + /* Initialize the interpolation parameter \ + 't' ranges from 0 to 1 across the scanline */ \ + float t = (xStart - start->screen[0]) * dt; \ + \ + /* Calculate the horizontal gradients for UV coordinates */ \ float xDu, xDv; \ if (ENABLE_TEXTURE) { \ - /* Calculate the horizontal gradients for UV coordinates */ \ xDu = (end->texcoord[0] - start->texcoord[0]) * dt; \ xDv = (end->texcoord[1] - start->texcoord[1]) * dt; \ } \ From c1621b3d6d03bcf90233c35e92898924d2e9526d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 10 May 2025 22:29:15 +0200 Subject: [PATCH 052/105] remove useless scissor related data --- src/external/rlsw.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index bd7246dc7..a11d446d0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -519,6 +519,7 @@ void swBindTexture(uint32_t id); #endif // RLSW_H +#define RLSW_IMPL #ifdef RLSW_IMPL #include @@ -631,8 +632,6 @@ typedef struct { int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) - int scPos[2]; // Represents the top-left corner of the scissor rect - int scDim[2]; // Represents the dimensions of the scissor rect (minus one) int scMin[2]; // Represents the minimum renderable point of the scissor rect (top-left) int scMax[2]; // Represents the maximum renderable point of the scissor rect (bottom-right) float scHMin[2]; // Represents the minimum renderable point of the scissor rect in clip space @@ -3384,11 +3383,6 @@ void swScissor(int x, int y, int width, int height) return; } - RLSW.scPos[0] = x; - RLSW.scPos[1] = y; - RLSW.scDim[0] = width; - RLSW.scDim[1] = height; - RLSW.scMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1); RLSW.scMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1); RLSW.scMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1); From a37a8cafd9dcdee1ecc01ec5699659355bf6a6c0 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 10 May 2025 22:59:52 +0200 Subject: [PATCH 053/105] review color blending system --- src/external/rlsw.h | 282 +++++++++++++++++++++++--------------------- 1 file changed, 149 insertions(+), 133 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a11d446d0..78b90c921 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -519,7 +519,6 @@ void swBindTexture(uint32_t id); #endif // RLSW_H -#define RLSW_IMPL #ifdef RLSW_IMPL #include @@ -572,6 +571,8 @@ typedef enum { SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp } sw_pixelformat_e; +typedef void (*sw_factor_f)(float *factor, const float *src, const float *dst); + typedef float sw_matrix_t[4*4]; typedef uint16_t sw_half_t; @@ -668,6 +669,9 @@ typedef struct { SWfactor srcFactor; SWfactor dstFactor; + sw_factor_f srcFactorFunc; + sw_factor_f dstFactorFunc; + SWface cullFace; // Faces to cull SWerrcode errCode; // Last error code @@ -1714,141 +1718,78 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa /* === Color Blending Functions === */ -static inline void sw_blend_colors(float dst[4], float src[4]) +static inline void sw_factor_zero(float *factor, const float *src, const float *dst) { - float src_factor[4] = { 0 }; - float dst_factor[4] = { 0 }; - - switch (RLSW.srcFactor) { - case SW_ZERO: - src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 0.0f; - break; - case SW_ONE: - src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 1.0f; - break; - case SW_SRC_COLOR: - src_factor[0] = src[0]; - src_factor[1] = src[1]; - src_factor[2] = src[2]; - src_factor[3] = src[3]; - break; - case SW_ONE_MINUS_SRC_COLOR: - src_factor[0] = 1.0f - src[0]; - src_factor[1] = 1.0f - src[1]; - src_factor[2] = 1.0f - src[2]; - src_factor[3] = 1.0f - src[3]; - break; - case SW_SRC_ALPHA: - src_factor[0] = src[3]; - src_factor[1] = src[3]; - src_factor[2] = src[3]; - src_factor[3] = src[3]; - break; - case SW_ONE_MINUS_SRC_ALPHA: - src_factor[0] = 1.0f - src[3]; - src_factor[1] = 1.0f - src[3]; - src_factor[2] = 1.0f - src[3]; - src_factor[3] = 1.0f - src[3]; - break; - case SW_DST_ALPHA: - src_factor[0] = dst[3]; - src_factor[1] = dst[3]; - src_factor[2] = dst[3]; - src_factor[3] = dst[3]; - break; - case SW_ONE_MINUS_DST_ALPHA: - src_factor[0] = 1.0f - dst[3]; - src_factor[1] = 1.0f - dst[3]; - src_factor[2] = 1.0f - dst[3]; - src_factor[3] = 1.0f - dst[3]; - break; - case SW_DST_COLOR: - src_factor[0] = dst[0]; - src_factor[1] = dst[1]; - src_factor[2] = dst[2]; - src_factor[3] = dst[3]; - break; - case SW_ONE_MINUS_DST_COLOR: - src_factor[0] = 1.0f - dst[0]; - src_factor[1] = 1.0f - dst[1]; - src_factor[2] = 1.0f - dst[2]; - src_factor[3] = 1.0f - dst[3]; - break; - case SW_SRC_ALPHA_SATURATE: - src_factor[0] = 1.0f; - src_factor[1] = 1.0f; - src_factor[2] = 1.0f; - src_factor[3] = fminf(src[3], 1.0f); - break; - } - - switch (RLSW.dstFactor) { - case SW_ZERO: - dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 0.0f; - break; - case SW_ONE: - dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 1.0f; - break; - case SW_SRC_COLOR: - dst_factor[0] = src[0]; - dst_factor[1] = src[1]; - dst_factor[2] = src[2]; - dst_factor[3] = src[3]; - break; - case SW_ONE_MINUS_SRC_COLOR: - dst_factor[0] = 1.0f - src[0]; - dst_factor[1] = 1.0f - src[1]; - dst_factor[2] = 1.0f - src[2]; - dst_factor[3] = 1.0f - src[3]; - break; - case SW_SRC_ALPHA: - dst_factor[0] = src[3]; - dst_factor[1] = src[3]; - dst_factor[2] = src[3]; - dst_factor[3] = src[3]; - break; - case SW_ONE_MINUS_SRC_ALPHA: - dst_factor[0] = 1.0f - src[3]; - dst_factor[1] = 1.0f - src[3]; - dst_factor[2] = 1.0f - src[3]; - dst_factor[3] = 1.0f - src[3]; - break; - case SW_DST_ALPHA: - dst_factor[0] = dst[3]; - dst_factor[1] = dst[3]; - dst_factor[2] = dst[3]; - dst_factor[3] = dst[3]; - break; - case SW_ONE_MINUS_DST_ALPHA: - dst_factor[0] = 1.0f - dst[3]; - dst_factor[1] = 1.0f - dst[3]; - dst_factor[2] = 1.0f - dst[3]; - dst_factor[3] = 1.0f - dst[3]; - break; - case SW_DST_COLOR: - dst_factor[0] = dst[0]; - dst_factor[1] = dst[1]; - dst_factor[2] = dst[2]; - dst_factor[3] = dst[3]; - break; - case SW_ONE_MINUS_DST_COLOR: - dst_factor[0] = 1.0f - dst[0]; - dst_factor[1] = 1.0f - dst[1]; - dst_factor[2] = 1.0f - dst[2]; - dst_factor[3] = 1.0f - dst[3]; - break; - case SW_SRC_ALPHA_SATURATE: - // NOTE: This case is only available for the source. - // Since the factors are validated before assignment, - // we should never reach this point. - break; - } - - for (int i = 0; i < 4; ++i) { - dst[i] = src_factor[i] * src[i] + dst_factor[i] * dst[i]; - } + factor[0] = factor[1] = factor[2] = factor[3] = 0.0f; } +static inline void sw_factor_one(float *factor, const float *src, const float *dst) +{ + factor[0] = factor[1] = factor[2] = factor[3] = 1.0f; +} + +static inline void sw_factor_src_color(float *factor, const float *src, const float *dst) +{ + factor[0] = src[0]; factor[1] = src[1]; factor[2] = src[2]; factor[3] = src[3]; +} + +static inline void sw_factor_one_minus_src_color(float *factor, const float *src, const float *dst) +{ + factor[0] = 1.0f - src[0]; factor[1] = 1.0f - src[1]; + factor[2] = 1.0f - src[2]; factor[3] = 1.0f - src[3]; +} + +static inline void sw_factor_src_alpha(float *factor, const float *src, const float *dst) +{ + factor[0] = factor[1] = factor[2] = factor[3] = src[3]; +} + +static inline void sw_factor_one_minus_src_alpha(float *factor, const float *src, const float *dst) +{ + float inv_alpha = 1.0f - src[3]; + factor[0] = factor[1] = factor[2] = factor[3] = inv_alpha; +} + +static inline void sw_factor_dst_alpha(float *factor, const float *src, const float *dst) +{ + factor[0] = factor[1] = factor[2] = factor[3] = dst[3]; +} + +static inline void sw_factor_one_minus_dst_alpha(float *factor, const float *src, const float *dst) +{ + float inv_alpha = 1.0f - dst[3]; + factor[0] = factor[1] = factor[2] = factor[3] = inv_alpha; +} + +static inline void sw_factor_dst_color(float *factor, const float *src, const float *dst) +{ + factor[0] = dst[0]; factor[1] = dst[1]; factor[2] = dst[2]; factor[3] = dst[3]; +} + +static inline void sw_factor_one_minus_dst_color(float *factor, const float *src, const float *dst) +{ + factor[0] = 1.0f - dst[0]; factor[1] = 1.0f - dst[1]; + factor[2] = 1.0f - dst[2]; factor[3] = 1.0f - dst[3]; +} + +static inline void sw_factor_src_alpha_saturate(float *factor, const float *src, const float *dst) +{ + factor[0] = factor[1] = factor[2] = 1.0f; + factor[3] = (src[3] < 1.0f) ? src[3] : 1.0f; +} + +static inline void sw_blend_colors(float dst[4], float src[4]) +{ + float srcFactor[4], dstFactor[4]; + + RLSW.srcFactorFunc(srcFactor, src, dst); + RLSW.dstFactorFunc(dstFactor, src, dst); + + dst[0] = srcFactor[0] * src[0] + dstFactor[0] * dst[0]; + dst[1] = srcFactor[1] * src[1] + dstFactor[1] * dst[1]; + dst[2] = srcFactor[2] * src[2] + dstFactor[2] * dst[2]; + dst[3] = srcFactor[3] * src[3] + dstFactor[3] * dst[3]; +} /* === Projection Helper Functions === */ @@ -3098,6 +3039,9 @@ bool swInit(int w, int h) RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; + RLSW.srcFactorFunc = sw_factor_src_alpha; + RLSW.dstFactorFunc = sw_factor_one_minus_src_alpha; + RLSW.polyMode = SW_FILL; RLSW.cullFace = SW_BACK; @@ -3429,6 +3373,78 @@ void swBlendFunc(SWfactor sfactor, SWfactor dfactor) } RLSW.srcFactor = sfactor; RLSW.dstFactor = dfactor; + + switch (sfactor) { + case SW_ZERO: + RLSW.srcFactorFunc = sw_factor_zero; + break; + case SW_ONE: + RLSW.srcFactorFunc = sw_factor_one; + break; + case SW_SRC_COLOR: + RLSW.srcFactorFunc = sw_factor_src_color; + break; + case SW_ONE_MINUS_SRC_COLOR: + RLSW.srcFactorFunc = sw_factor_one_minus_src_color; + break; + case SW_SRC_ALPHA: + RLSW.srcFactorFunc = sw_factor_src_alpha; + break; + case SW_ONE_MINUS_SRC_ALPHA: + RLSW.srcFactorFunc = sw_factor_one_minus_src_alpha; + break; + case SW_DST_ALPHA: + RLSW.srcFactorFunc = sw_factor_dst_alpha; + break; + case SW_ONE_MINUS_DST_ALPHA: + RLSW.srcFactorFunc = sw_factor_one_minus_dst_alpha; + break; + case SW_DST_COLOR: + RLSW.srcFactorFunc = sw_factor_dst_color; + break; + case SW_ONE_MINUS_DST_COLOR: + RLSW.srcFactorFunc = sw_factor_one_minus_dst_color; + break; + case SW_SRC_ALPHA_SATURATE: + RLSW.srcFactorFunc = sw_factor_src_alpha_saturate; + break; + } + + switch (dfactor) { + case SW_ZERO: + RLSW.srcFactorFunc = sw_factor_zero; + break; + case SW_ONE: + RLSW.srcFactorFunc = sw_factor_one; + break; + case SW_SRC_COLOR: + RLSW.srcFactorFunc = sw_factor_src_color; + break; + case SW_ONE_MINUS_SRC_COLOR: + RLSW.srcFactorFunc = sw_factor_one_minus_src_color; + break; + case SW_SRC_ALPHA: + RLSW.srcFactorFunc = sw_factor_src_alpha; + break; + case SW_ONE_MINUS_SRC_ALPHA: + RLSW.srcFactorFunc = sw_factor_one_minus_src_alpha; + break; + case SW_DST_ALPHA: + RLSW.srcFactorFunc = sw_factor_dst_alpha; + break; + case SW_ONE_MINUS_DST_ALPHA: + RLSW.srcFactorFunc = sw_factor_one_minus_dst_alpha; + break; + case SW_DST_COLOR: + RLSW.srcFactorFunc = sw_factor_dst_color; + break; + case SW_ONE_MINUS_DST_COLOR: + RLSW.srcFactorFunc = sw_factor_one_minus_dst_color; + break; + case SW_SRC_ALPHA_SATURATE: + // NOTE: Should never be reached + break; + } } void swPolygonMode(SWpoly mode) From f0d8653d078fa09facc4c456b51332e18a0df1a2 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 10 May 2025 23:21:36 +0200 Subject: [PATCH 054/105] greatly improve float saturation --- src/external/rlsw.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 78b90c921..f14887cae 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -736,23 +736,28 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma static inline float sw_saturate(float x) { - // After several comparisons, this saturation method - // seems to be the most optimized by GCC and Clang, - // and it does not produce any conditional branching. + // Clamps a floating point value between 0.0 and 1.0 - // However, it is possible that a clamp could be - // more efficient on certain platforms. - // Comparisons will need to be made. + // This implementation uses IEEE 754 bit manipulation: + // - Uses the sign bit to detect negative values + // - Directly compares with binary representation of 1.0f to detect values > 1.0 - // SEE: https://godbolt.org/z/5qYznK5zj + // Use union to access the bits of the float as an unsigned int + union { float f; uint32_t u; } v; + v.f = x; - // Saturation from below: max(0, x) - float y = 0.5f * (x + fabsf(x)); + // Check sign bit (bit 31): if set, x is negative, return 0.0f + if (v.u & 0x80000000) return 0.0f; - // Saturation from above: min(1, y) - return y - 0.5f * ((y - 1.0f) + fabsf(y - 1.0f)); + // Extract the unsigned magnitude (exponent + mantissa bits) + uint32_t expMantissa = v.u & 0x7FFFFFFF; - // return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x); + // If magnitude > binary representation of 1.0f (0x3F800000), return 1.0f + // This efficiently handles all values > 1.0f without additional computation + if (expMantissa > 0x3F800000) return 1.0f; + + // Value is between 0.0f and 1.0f inclusive, return unchanged + return x; } static inline int sw_clampi(int v, int min, int max) @@ -1247,7 +1252,6 @@ int sw_get_pixel_bpp(sw_pixelformat_e format) return bpp; } - static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); From d98f229814d226095b28f4baa7dd9b6d32f69eb9 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 10 May 2025 23:25:13 +0200 Subject: [PATCH 055/105] tweak lerp vertex function --- src/external/rlsw.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index f14887cae..00572a5db 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -775,9 +775,31 @@ static inline float sw_lerp(float a, float b, float t) static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_vertex_t* b, float t) { sw_vertex_t result; - for (int i = 0; i < offsetof(sw_vertex_t, screen) / sizeof(float); i++) { - ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); - } + + const float tInv = 1.0f - t; + + // Position interpolation (4 components) + result.position[0] = a->position[0] * tInv + b->position[0] * t; + result.position[1] = a->position[1] * tInv + b->position[1] * t; + result.position[2] = a->position[2] * tInv + b->position[2] * t; + result.position[3] = a->position[3] * tInv + b->position[3] * t; + + // Texture coordinate interpolation (2 components) + result.texcoord[0] = a->texcoord[0] * tInv + b->texcoord[0] * t; + result.texcoord[1] = a->texcoord[1] * tInv + b->texcoord[1] * t; + + // Color interpolation (4 components) + result.color[0] = a->color[0] * tInv + b->color[0] * t; + result.color[1] = a->color[1] * tInv + b->color[1] * t; + result.color[2] = a->color[2] * tInv + b->color[2] * t; + result.color[3] = a->color[3] * tInv + b->color[3] * t; + + // Homogeneous coordinate interpolation (4 components) + result.homogeneous[0] = a->homogeneous[0] * tInv + b->homogeneous[0] * t; + result.homogeneous[1] = a->homogeneous[1] * tInv + b->homogeneous[1] * t; + result.homogeneous[2] = a->homogeneous[2] * tInv + b->homogeneous[2] * t; + result.homogeneous[3] = a->homogeneous[3] * tInv + b->homogeneous[3] * t; + return result; } From 1ae8bb4970e66734c6dcd710f3675ee1700bee17 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 10 May 2025 23:43:39 +0200 Subject: [PATCH 056/105] use opitmized fract function in sw_texture_map --- src/external/rlsw.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 00572a5db..aecebeb3a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -519,6 +519,7 @@ void swBindTexture(uint32_t id); #endif // RLSW_H +#define RLSW_IMPL #ifdef RLSW_IMPL #include @@ -760,6 +761,37 @@ static inline float sw_saturate(float x) return x; } +static inline float sw_fract(float x) +{ + // Computes the positive fractional part of a float. + // Equivalent to fabs(x) - floorf(fabs(x)). + // Uses IEEE 754 bit tricks for efficiency and edge case handling. + + union { float f; uint32_t u; } v; + v.f = x; + + // Get absolute value bits (clear sign bit) + uint32_t abs_bits = v.u & 0x7FFFFFFF; + + // Case 1: |x| < 1.0f -> integer part is 0, return |x| + if (abs_bits < 0x3F800000) { + v.u = abs_bits; // Ensure positive result + return v.f; + } + + // Case 2: |x| ≥ 2^24 -> float is an exact integer, return 0.0f + // Also handles Inf and NaN as 0.0f + if (abs_bits >= 0x4B000000) { + return 0.0f; + } + + // Case 3: 1.0f ≤ |x| < 2^24 -> compute |x| - floor(|x|) + v.u = abs_bits; + float abs_x = v.f; + + return abs_x - floorf(abs_x); +} + static inline int sw_clampi(int v, int min, int max) { if (v < min) return min; @@ -1668,7 +1700,7 @@ static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) { switch (mode) { case SW_REPEAT: - *out = (int)((in - floorf(in)) * max + 0.5f); + *out = (int)(sw_fract(in) * max + 0.5f); break; case SW_CLAMP: *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); From d9c60f2d39eb7d53bed69d2e85460eb3f2a916be Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 11 May 2025 00:12:47 +0200 Subject: [PATCH 057/105] tweak framebuffer functions for better readability --- src/external/rlsw.h | 174 ++++++++++++++++++++++++++++---------------- 1 file changed, 113 insertions(+), 61 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index aecebeb3a..d436d1dd8 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -519,7 +519,6 @@ void swBindTexture(uint32_t id); #endif // RLSW_H -#define RLSW_IMPL #ifdef RLSW_IMPL #include @@ -901,84 +900,35 @@ static inline void sw_framebuffer_inc_depth_addr(void** ptr) *ptr = (void*)(((uint8_t*)*ptr) + SW_DEPTH_PIXEL_SIZE); } +#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 + static inline void sw_framebuffer_read_color(float dst[4], const void* src) { -#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t pixel = ((uint8_t*)src)[0]; + dst[0] = ((pixel >> 5) & 0x07) * (1.0f / 7.0f); dst[1] = ((pixel >> 2) & 0x07) * (1.0f / 7.0f); dst[2] = (pixel & 0x03) * (1.0f / 3.0f); dst[3] = 1.0f; -#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 - uint16_t pixel = ((uint16_t*)src)[0]; - dst[0] = ((pixel >> 11) & 0x1F) * (1.0f / 31.0f); - dst[1] = ((pixel >> 5) & 0x3F) * (1.0f / 63.0f); - dst[2] = (pixel & 0x1F) * (1.0f / 31.0f); - dst[3] = 1.0f; -#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 - dst[0] = ((uint8_t*)src)[0] * (1.0f / 255.0f); - dst[1] = ((uint8_t*)src)[1] * (1.0f / 255.0f); - dst[2] = ((uint8_t*)src)[2] * (1.0f / 255.0f); - dst[3] = 1.0f; -#endif } static inline void sw_framebuffer_write_color(void* dst, float color[3]) { -#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + ((uint8_t*)dst)[0] = (r << 5) | (g << 2) | b; - -#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 - uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; - uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; - uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; - ((uint16_t*)dst)[0] = (r << 11) | (g << 5) | b; - -#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 - ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); - ((uint8_t*)dst)[1] = (uint8_t)(color[1] * UINT8_MAX); - ((uint8_t*)dst)[2] = (uint8_t)(color[2] * UINT8_MAX); -#endif -} - -static inline float sw_framebuffer_read_depth(const void* src) -{ -#if (SW_DEPTH_BUFFER_BITS == 8) - return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); -#elif (SW_DEPTH_BUFFER_BITS == 16) - return (float)((uint16_t*)src)[0] * (1.0f / UINT16_MAX); -#elif (SW_DEPTH_BUFFER_BITS == 24) - uint32_t depth24 = (((uint8_t*)src)[0] << 16) | - (((uint8_t*)src)[1] << 8) | - ((uint8_t*)src)[2]; - return depth24 / (float)0xFFFFFF; -#endif -} - -static inline void sw_framebuffer_write_depth(void* dst, float depth) -{ -#if (SW_DEPTH_BUFFER_BITS == 8) - ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); -#elif (SW_DEPTH_BUFFER_BITS == 16) - ((uint16_t*)dst)[0] = (uint16_t)(depth * UINT16_MAX); -#elif (SW_DEPTH_BUFFER_BITS == 24) - uint32_t depth24 = (uint32_t)(depth * 0xFFFFFF); - ((uint8_t*)dst)[0] = (depth24 >> 16) & 0xFF; - ((uint8_t*)dst)[1] = (depth24 >> 8) & 0xFF; - ((uint8_t*)dst)[2] = depth24 & 0xFF; -#endif } static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { -#if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t* p = (uint8_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -990,11 +940,37 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] p[i] = (r << 5) | (g << 2) | b; } } -#elif (SW_COLOR_BUFFER_BITS == 16) +} + +#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 + +static inline void sw_framebuffer_read_color(float dst[4], const void* src) +{ + uint16_t pixel = ((uint16_t*)src)[0]; + + dst[0] = ((pixel >> 11) & 0x1F) * (1.0f / 31.0f); + dst[1] = ((pixel >> 5) & 0x3F) * (1.0f / 63.0f); + dst[2] = (pixel & 0x1F) * (1.0f / 31.0f); + dst[3] = 1.0f; +} + +static inline void sw_framebuffer_write_color(void* dst, float color[3]) +{ uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + + ((uint16_t*)dst)[0] = (r << 11) | (g << 5) | b; +} + +static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) +{ + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t* p = (uint16_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -1006,11 +982,33 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] p[i] = (r << 11) | (g << 5) | b; } } -#elif (SW_COLOR_BUFFER_BITS == 24) +} + +#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 + +static inline void sw_framebuffer_read_color(float dst[4], const void* src) +{ + dst[0] = ((uint8_t*)src)[0] * (1.0f / 255.0f); + dst[1] = ((uint8_t*)src)[1] * (1.0f / 255.0f); + dst[2] = ((uint8_t*)src)[2] * (1.0f / 255.0f); + dst[3] = 1.0f; +} + +static inline void sw_framebuffer_write_color(void* dst, float color[3]) +{ + ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); + ((uint8_t*)dst)[1] = (uint8_t)(color[1] * UINT8_MAX); + ((uint8_t*)dst)[2] = (uint8_t)(color[2] * UINT8_MAX); +} + +static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) +{ uint8_t r = (uint8_t)(color[0] * 255); uint8_t g = (uint8_t)(color[1] * 255); uint8_t b = (uint8_t)(color[2] * 255); + uint8_t* p = (uint8_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -1027,14 +1025,27 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] *p++ = b; } } -#endif +} + +#endif // SW_COLOR_BUFFER_BITS + +#if (SW_DEPTH_BUFFER_BITS == 8) + +static inline float sw_framebuffer_read_depth(const void* src) +{ + return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); +} + +static inline void sw_framebuffer_write_depth(void* dst, float depth) +{ + ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); } static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { -#if (SW_DEPTH_BUFFER_BITS == 8) uint8_t v = value * UINT8_MAX; uint8_t* p = (uint8_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -1047,9 +1058,25 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) p[i] = v; } } +} + #elif (SW_DEPTH_BUFFER_BITS == 16) + +static inline float sw_framebuffer_read_depth(const void* src) +{ + return (float)((uint16_t*)src)[0] * (1.0f / UINT16_MAX); +} + +static inline void sw_framebuffer_write_depth(void* dst, float depth) +{ + ((uint16_t*)dst)[0] = (uint16_t)(depth * UINT16_MAX); +} + +static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) +{ uint16_t v = value * UINT16_MAX; uint16_t* p = (uint16_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -1062,9 +1089,33 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) p[i] = v; } } +} + #elif (SW_DEPTH_BUFFER_BITS == 24) + +static inline float sw_framebuffer_read_depth(const void* src) +{ + uint32_t depth24 = (((uint8_t*)src)[0] << 16) | + (((uint8_t*)src)[1] << 8) | + ((uint8_t*)src)[2]; + + return depth24 / (float)0xFFFFFF; +} + +static inline void sw_framebuffer_write_depth(void* dst, float depth) +{ + uint32_t depth24 = (uint32_t)(depth * 0xFFFFFF); + + ((uint8_t*)dst)[0] = (depth24 >> 16) & 0xFF; + ((uint8_t*)dst)[1] = (depth24 >> 8) & 0xFF; + ((uint8_t*)dst)[2] = depth24 & 0xFF; +} + +static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) +{ uint32_t v = value * UINT32_MAX; uint8_t* p = (uint8_t*)ptr; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { @@ -1082,9 +1133,10 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) *p++ = v & 0xFF; } } -#endif } +#endif // SW_DEPTH_BUFFER_BITS + static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) From a8f222d52c418c087dd77ef6652e3daf2f0cbc6a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 11 May 2025 02:35:34 +0200 Subject: [PATCH 058/105] optimized copy/blit functions for each dst format --- src/external/rlsw.h | 841 ++++++++++++++++++++++++++++++-------------- 1 file changed, 573 insertions(+), 268 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d436d1dd8..ce7173ea8 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -835,6 +835,62 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve } +/* === Half Floating Point === */ + +static inline uint32_t sw_cvt_hf_ui(uint16_t h) +{ + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; +} + +static inline float sw_cvt_hf(sw_half_t y) +{ + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; +} + +static inline uint16_t sw_cvt_fh_ui(uint32_t ui) +{ + int32_t s = (ui >> 16) & 0x8000; + int32_t em = ui & 0x7fffffff; + + // bias exponent and round to nearest; 112 is relative exponent bias (127-15) + int32_t h = (em - (112 << 23) + (1 << 12)) >> 13; + + // underflow: flush to zero; 113 encodes exponent -14 + h = (em < (113 << 23)) ? 0 : h; + + // overflow: infinity; 143 encodes exponent 16 + h = (em >= (143 << 23)) ? 0x7c00 : h; + + // NaN; note that we convert all types of NaN to qNaN + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (uint16_t)(s | h); +} + +static inline sw_half_t sw_cvt_fh(float i) +{ + union { float f; uint32_t i; } v; + v.f = i; + return sw_cvt_fh_ui(v.i); +} + + /* === Framebuffer Part === */ static inline bool sw_framebuffer_load(int w, int h) @@ -880,7 +936,7 @@ static inline bool sw_framebuffer_resize(int w, int h) return true; } -static inline void* sw_framebuffer_get_color_addr(void* ptr, uint32_t offset) +static inline void* sw_framebuffer_get_color_addr(const void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_COLOR_PIXEL_SIZE; } @@ -890,7 +946,12 @@ static inline void sw_framebuffer_inc_color_addr(void** ptr) *ptr = (void*)(((uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); } -static inline void* sw_framebuffer_get_depth_addr(void* ptr, uint32_t offset) +static inline void sw_framebuffer_inc_const_color_addr(const void** ptr) +{ + *ptr = (const void*)(((const uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); +} + +static inline void* sw_framebuffer_get_depth_addr(const void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_DEPTH_PIXEL_SIZE; } @@ -912,6 +973,20 @@ static inline void sw_framebuffer_read_color(float dst[4], const void* src) dst[3] = 1.0f; } +static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) +{ + uint8_t pixel = ((const uint8_t*)src)[0]; + + uint8_t r = (pixel >> 5) & 0x07; + uint8_t g = (pixel >> 2) & 0x07; + uint8_t b = pixel & 0x03; + + dst[0] = (r * 255 + 3) / 7; + dst[1] = (g * 255 + 3) / 7; + dst[2] = (b * 255 + 1) / 3; + dst[3] = 255; +} + static inline void sw_framebuffer_write_color(void* dst, float color[3]) { uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -954,6 +1029,20 @@ static inline void sw_framebuffer_read_color(float dst[4], const void* src) dst[3] = 1.0f; } +static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) +{ + uint16_t pixel = ((const uint16_t*)src)[0]; + + uint8_t r = (pixel >> 11) & 0x1F; + uint8_t g = (pixel >> 5) & 0x3F; + uint8_t b = pixel & 0x1F; + + dst[0] = (r * 255 + 15) / 31; + dst[1] = (g * 255 + 31) / 63; + dst[2] = (b * 255 + 15) / 31; + dst[3] = 255; +} + static inline void sw_framebuffer_write_color(void* dst, float color[3]) { uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; @@ -994,6 +1083,14 @@ static inline void sw_framebuffer_read_color(float dst[4], const void* src) dst[3] = 1.0f; } +static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) +{ + dst[0] = ((uint8_t*)src)[0]; + dst[1] = ((uint8_t*)src)[1]; + dst[2] = ((uint8_t*)src)[2]; + dst[3] = 255; +} + static inline void sw_framebuffer_write_color(void* dst, float color[3]) { ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); @@ -1216,61 +1313,351 @@ static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, } } +#define DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(name, DST_PTR_T) \ +static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T* dst) \ +{ \ + const void* src = RLSW.framebuffer.color; \ + \ + for (int iy = y; iy < h; iy++) { \ + for (int ix = x; ix < w; ix++) { \ + uint8_t color[4]; \ + sw_framebuffer_read_color8(color, src); \ -/* === Half Floating Point === */ +#define DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(name, DST_PTR_T) \ +static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T* dst) \ +{ \ + const void* src = RLSW.framebuffer.color; \ + \ + for (int iy = y; iy < h; iy++) { \ + for (int ix = x; ix < w; ix++) { \ + float color[4]; \ + sw_framebuffer_read_color(color, src); \ -static inline uint32_t sw_cvt_hf_ui(uint16_t h) -{ - uint32_t s = (uint32_t)(h & 0x8000) << 16; - int32_t em = h & 0x7fff; - - // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) - int32_t r = (em + (112 << 10)) << 13; - - // denormal: flush to zero - r = (em < (1 << 10)) ? 0 : r; - - // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases - // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 - r += (em >= (31 << 10)) ? (112 << 23) : 0; - - return s | r; +#define DEFINE_FRAMEBUFFER_COPY_END() \ + sw_framebuffer_inc_const_color_addr(&src); \ + } \ + } \ } -static inline float sw_cvt_hf(sw_half_t y) +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(GRAYSCALE, uint8_t) { - union { float f; uint32_t i; } v = { - .i = sw_cvt_hf_ui(y) - }; - return v.f; + // NTSC grayscale conversion: Y = 0.299R + 0.587G + 0.114B + uint8_t gray = (uint8_t)((color[0] * 299 + color[1] * 587 + color[2] * 114 + 500) / 1000); + *dst++ = gray; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(GRAYALPHA, uint8_t) +{ + // Convert RGB to grayscale using NTSC formula + uint8_t gray = (uint8_t)((color[0] * 299 + color[1] * 587 + color[2] * 114 + 500) / 1000); + + dst[0] = gray; + dst[1] = color[3]; // alpha + + dst += 2; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R5G6B5, uint16_t) +{ + // Convert 8-bit RGB to 5:6:5 format + uint8_t r5 = (color[0] * 31 + 127) / 255; + uint8_t g6 = (color[1] * 63 + 127) / 255; + uint8_t b5 = (color[2] * 31 + 127) / 255; + + uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; + + *dst++ = rgb565; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R8G8B8, uint8_t) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R5G5B5A1, uint16_t) +{ + uint8_t r5 = (color[0] * 31 + 127) / 255; + uint8_t g5 = (color[1] * 31 + 127) / 255; + uint8_t b5 = (color[2] * 31 + 127) / 255; + uint8_t a1 = color[3] >= 128 ? 1 : 0; + + uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; + + *dst++ = pixel; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R4G4B4A4, uint16_t) +{ + uint8_t r4 = (color[0] * 15 + 127) / 255; + uint8_t g4 = (color[1] * 15 + 127) / 255; + uint8_t b4 = (color[2] * 15 + 127) / 255; + uint8_t a4 = (color[3] * 15 + 127) / 255; + + uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; + + *dst++ = pixel; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R8G8B8A8, uint8_t) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + dst[3] = color[3]; // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R32, float) +{ + dst[0] = color[0]; + dst++; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R32G32B32, float) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R32G32B32A32, float) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + dst[3] = color[3]; // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); + dst++; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R16G16B16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); // R + dst[1] = sw_cvt_fh(color[1]); // G + dst[2] = sw_cvt_fh(color[2]); // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_COPY_END() + +DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R16G16B16A16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); // R + dst[1] = sw_cvt_fh(color[1]); // G + dst[2] = sw_cvt_fh(color[2]); // B + dst[3] = sw_cvt_fh(color[3]); // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_COPY_END() + +#define DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(name, DST_PTR_T) \ +static inline void sw_framebuffer_blit_to_##name( \ + int xDst, int yDst, int wDst, int hDst, \ + int xSrc, int ySrc, int wSrc, int hSrc, \ + DST_PTR_T* dst) \ +{ \ + const uint8_t* srcBase = RLSW.framebuffer.color; \ + int fbWidth = RLSW.framebuffer.width; \ + \ + uint32_t xScale = ((uint32_t)wSrc << 16) / (uint32_t)wDst; \ + uint32_t yScale = ((uint32_t)hSrc << 16) / (uint32_t)hDst; \ + \ + for (int dy = 0; dy < hDst; dy++) { \ + uint32_t yFix = ((uint32_t)ySrc << 16) + dy * yScale; \ + int sy = yFix >> 16; \ + \ + for (int dx = 0; dx < wDst; dx++) { \ + uint32_t xFix = dx * xScale; \ + int sx = xFix >> 16; \ + const void* srcPtr = sw_framebuffer_get_color_addr(srcBase, sy * fbWidth + sx); \ + uint8_t color[4]; \ + sw_framebuffer_read_color8(color, srcPtr); \ + +#define DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(name, DST_PTR_T) \ +static inline void sw_framebuffer_blit_to_##name( \ + int xDst, int yDst, int wDst, int hDst, \ + int xSrc, int ySrc, int wSrc, int hSrc, \ + DST_PTR_T* dst) \ +{ \ + const uint8_t* srcBase = RLSW.framebuffer.color; \ + int fbWidth = RLSW.framebuffer.width; \ + \ + uint32_t xScale = ((uint32_t)wSrc << 16) / (uint32_t)wDst; \ + uint32_t yScale = ((uint32_t)hSrc << 16) / (uint32_t)hDst; \ + \ + for (int dy = 0; dy < hDst; dy++) { \ + uint32_t yFix = ((uint32_t)ySrc << 16) + dy * yScale; \ + int sy = yFix >> 16; \ + \ + for (int dx = 0; dx < wDst; dx++) { \ + uint32_t xFix = dx * xScale; \ + int sx = xFix >> 16; \ + const void* srcPtr = sw_framebuffer_get_color_addr(srcBase, sy * fbWidth + sx); \ + float color[4]; \ + sw_framebuffer_read_color(color, srcPtr); \ + +#define DEFINE_FRAMEBUFFER_BLIT_END() \ + } \ + } \ } -static inline uint16_t sw_cvt_fh_ui(uint32_t ui) +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(GRAYSCALE, uint8_t) { - int32_t s = (ui >> 16) & 0x8000; - int32_t em = ui & 0x7fffffff; - - // bias exponent and round to nearest; 112 is relative exponent bias (127-15) - int32_t h = (em - (112 << 23) + (1 << 12)) >> 13; - - // underflow: flush to zero; 113 encodes exponent -14 - h = (em < (113 << 23)) ? 0 : h; - - // overflow: infinity; 143 encodes exponent 16 - h = (em >= (143 << 23)) ? 0x7c00 : h; - - // NaN; note that we convert all types of NaN to qNaN - h = (em > (255 << 23)) ? 0x7e00 : h; - - return (uint16_t)(s | h); + uint8_t gray = (uint8_t)((color[0] * 299 + color[1] * 587 + color[2] * 114 + 500) / 1000); + *dst++ = gray; } +DEFINE_FRAMEBUFFER_BLIT_END() -static inline sw_half_t sw_cvt_fh(float i) +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(GRAYALPHA, uint8_t) { - union { float f; uint32_t i; } v; - v.f = i; - return sw_cvt_fh_ui(v.i); + uint8_t gray = (uint8_t)((color[0] * 299 + color[1] * 587 + color[2] * 114 + 500) / 1000); + + dst[0] = gray; + dst[1] = color[3]; // alpha + + dst += 2; } +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R5G6B5, uint16_t) +{ + uint8_t r5 = (color[0] * 31 + 127) / 255; + uint8_t g6 = (color[1] * 63 + 127) / 255; + uint8_t b5 = (color[2] * 31 + 127) / 255; + + uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; + + *dst++ = rgb565; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R8G8B8, uint8_t) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R5G5B5A1, uint16_t) +{ + uint8_t r5 = (color[0] * 31 + 127) / 255; + uint8_t g5 = (color[1] * 31 + 127) / 255; + uint8_t b5 = (color[2] * 31 + 127) / 255; + uint8_t a1 = color[3] >= 128 ? 1 : 0; + + uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; + + *dst++ = pixel; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R4G4B4A4, uint16_t) +{ + uint8_t r4 = (color[0] * 15 + 127) / 255; + uint8_t g4 = (color[1] * 15 + 127) / 255; + uint8_t b4 = (color[2] * 15 + 127) / 255; + uint8_t a4 = (color[3] * 15 + 127) / 255; + + uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; + + *dst++ = pixel; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R8G8B8A8, uint8_t) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + dst[3] = color[3]; // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R32, uint8_t) +{ + dst[0] = color[0]; + dst++; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R32G32B32, float) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R32G32B32A32, float) +{ + dst[0] = color[0]; // R + dst[1] = color[1]; // G + dst[2] = color[2]; // B + dst[3] = color[3]; // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); + dst++; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R16G16B16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); // R + dst[1] = sw_cvt_fh(color[1]); // G + dst[2] = sw_cvt_fh(color[2]); // B + + dst += 3; +} +DEFINE_FRAMEBUFFER_BLIT_END() + +DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R16G16B16A16, sw_half_t) +{ + dst[0] = sw_cvt_fh(color[0]); // R + dst[1] = sw_cvt_fh(color[1]); // G + dst[2] = sw_cvt_fh(color[2]); // B + dst[3] = sw_cvt_fh(color[3]); // A + + dst += 4; +} +DEFINE_FRAMEBUFFER_BLIT_END() /* === Pixel Format Part === */ @@ -1558,193 +1945,6 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse } } -static inline void sw_set_pixel_grayscale(void* pixels, uint32_t offset, const float* color) -{ - ((uint8_t*)pixels)[offset] = (uint8_t)(color[0] * 255.0f); -} - -static inline void sw_set_pixel_red_16(void* pixels, uint32_t offset, const float* color) -{ - ((sw_half_t*)pixels)[offset] = sw_cvt_fh(color[0]); -} - -static inline void sw_set_pixel_red_32(void* pixels, uint32_t offset, const float* color) -{ - ((float*)pixels)[offset] = color[0]; -} - -static inline void sw_set_pixel_grayscale_alpha(void* pixels, uint32_t offset, const float* color) -{ - uint8_t* pixelData = (uint8_t*)pixels + 2 * offset; - - pixelData[0] = (uint8_t)(color[0] * 255.0f); // Valeur de gris - pixelData[1] = (uint8_t)(color[3] * 255.0f); // Alpha -} - -static inline void sw_set_pixel_rgb_565(void* pixels, uint32_t offset, const float* color) -{ - uint16_t* pixel = (uint16_t*)pixels + offset; - - uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; - uint16_t g = (uint16_t)(color[1] * 63) & 0x3F; - uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; - - *pixel = (r << 11) | (g << 5) | b; -} - -static inline void sw_set_pixel_rgb_888(void* pixels, uint32_t offset, const float* color) -{ - uint8_t* pixel = (uint8_t*)pixels + 3 * offset; - - pixel[0] = (uint8_t)(color[0] * 255.0f); - pixel[1] = (uint8_t)(color[1] * 255.0f); - pixel[2] = (uint8_t)(color[2] * 255.0f); -} - -static inline void sw_set_pixel_rgb_161616(void* pixels, uint32_t offset, const float* color) -{ - sw_half_t* pixel = (sw_half_t*)pixels + 3 * offset; - - pixel[0] = sw_cvt_fh(color[0]); - pixel[1] = sw_cvt_fh(color[1]); - pixel[2] = sw_cvt_fh(color[2]); -} - -static inline void sw_set_pixel_rgb_323232(void* pixels, uint32_t offset, const float* color) -{ - float* pixel = (float*)pixels + 3 * offset; - - pixel[0] = color[0]; - pixel[1] = color[1]; - pixel[2] = color[2]; -} - -static inline void sw_set_pixel_rgba_5551(void* pixels, uint32_t offset, const float* color) -{ - uint16_t* pixel = (uint16_t*)pixels + offset; - - uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; - uint16_t g = (uint16_t)(color[1] * 31) & 0x1F; - uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; - uint16_t a = (color[3] > 0.5f) ? 1 : 0; // Alpha 1 bit - - *pixel = (r << 11) | (g << 6) | (b << 1) | a; -} - -static inline void sw_set_pixel_rgba_4444(void* pixels, uint32_t offset, const float* color) -{ - uint16_t* pixel = (uint16_t*)pixels + offset; - - uint16_t r = (uint16_t)(color[0] * 15) & 0x0F; - uint16_t g = (uint16_t)(color[1] * 15) & 0x0F; - uint16_t b = (uint16_t)(color[2] * 15) & 0x0F; - uint16_t a = (uint16_t)(color[3] * 15) & 0x0F; - - *pixel = (r << 12) | (g << 8) | (b << 4) | a; -} - -static inline void sw_set_pixel_rgba_8888(void* pixels, uint32_t offset, const float* color) -{ - uint8_t* pixel = (uint8_t*)pixels + 4 * offset; - - pixel[0] = (uint8_t)(color[0] * 255.0f); - pixel[1] = (uint8_t)(color[1] * 255.0f); - pixel[2] = (uint8_t)(color[2] * 255.0f); - pixel[3] = (uint8_t)(color[3] * 255.0f); -} - -static inline void sw_set_pixel_rgba_16161616(void* pixels, uint32_t offset, const float* color) -{ - sw_half_t* pixel = (sw_half_t*)pixels + 4 * offset; - - pixel[0] = sw_cvt_fh(color[0]); - pixel[1] = sw_cvt_fh(color[1]); - pixel[2] = sw_cvt_fh(color[2]); - pixel[3] = sw_cvt_fh(color[3]); -} - -static inline void sw_set_pixel_rgba_32323232(void* pixels, uint32_t offset, const float* color) -{ - float* pixel = (float*)pixels + 4 * offset; - - pixel[0] = color[0]; - pixel[1] = color[1]; - pixel[2] = color[2]; - pixel[3] = color[3]; -} - -static inline void sw_set_pixel(void* pixels, uint32_t offset, sw_pixelformat_e format, const float* color) -{ - switch (format) { - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: - sw_set_pixel_grayscale(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: - sw_set_pixel_grayscale_alpha(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: - sw_set_pixel_rgb_565(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: - sw_set_pixel_rgb_888(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: - sw_set_pixel_rgba_5551(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: - sw_set_pixel_rgba_4444(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: - sw_set_pixel_rgba_8888(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32: - sw_set_pixel_red_32(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: - sw_set_pixel_rgb_323232(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: - sw_set_pixel_rgba_32323232(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16: - sw_set_pixel_red_16(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: - sw_set_pixel_rgb_161616(pixels, offset, color); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: - sw_set_pixel_rgba_16161616(pixels, offset, color); - break; - - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: - break; - - } -} - /* === Texture Sampling Part === */ @@ -3209,60 +3409,165 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, { sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); - void* src = RLSW.framebuffer.color; + if (w <= 0) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } - int wSrc = RLSW.framebuffer.width; - int hSrcM1 = RLSW.framebuffer.height - 1; + if (h <= 0) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + if (w > RLSW.framebuffer.width) + w = RLSW.framebuffer.width; + + if (h > RLSW.framebuffer.height) + h = RLSW.framebuffer.height; + + x = sw_clampi(x, 0, w); + y = sw_clampi(y, 0, h); + + switch (pFormat) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_framebuffer_copy_to_R5G6B5(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_framebuffer_copy_to_R8G8B8(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_framebuffer_copy_to_R5G5B5A1(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_framebuffer_copy_to_R4G4B4A4(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_framebuffer_copy_to_R8G8B8A8(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_framebuffer_copy_to_R32(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_framebuffer_copy_to_R32G32B32(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_framebuffer_copy_to_R32G32B32A32(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_framebuffer_copy_to_R16(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_framebuffer_copy_to_R16G16B16(x, y, w, h, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_framebuffer_copy_to_R16G16B16A16(x, y, w, h, pixels); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + RLSW.errCode = SW_INVALID_ENUM; + break; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - float color[4]; - sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, (hSrcM1 - y) * wSrc + x)); - sw_set_pixel(pixels, y * wSrc + x, pFormat, color); - } } } -void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, +void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void* pixels) { sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); - void* src = RLSW.framebuffer.color; - int fbWidth = RLSW.framebuffer.width; - int fbHeight = RLSW.framebuffer.height; + if (wSrc <= 0) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } - // Calculation of scaling factors in 16.16 (fixed-point) - const int xScale = (wSrc << 16) / wDst; - const int yScale = (hSrc << 16) / hDst; + if (hSrc <= 0) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } - const int xSrcBase = xSrc << 16; - const int ySrcBase = ySrc << 16; + if (wSrc > RLSW.framebuffer.width) + wSrc = RLSW.framebuffer.width; - for (int y = 0; y < hDst; y++) { - const int ySrcFixed = ySrcBase + y * yScale; - const int ySrcInt = ySrcFixed >> 16; + if (hSrc > RLSW.framebuffer.height) + hSrc = RLSW.framebuffer.height; - if ((unsigned)ySrcInt >= (unsigned)fbHeight) { - continue; - } + xSrc = sw_clampi(xSrc, 0, wSrc); + ySrc = sw_clampi(ySrc, 0, hSrc); - for (int x = 0; x < wDst; x++) { - const int xSrcFixed = xSrcBase + x * xScale; - const int xSrcInt = xSrcFixed >> 16; + switch (pFormat) { - if ((unsigned)xSrcInt >= (unsigned)fbWidth) { - continue; - } + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_framebuffer_blit_to_R5G6B5(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_framebuffer_blit_to_R8G8B8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_framebuffer_blit_to_R5G5B5A1(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_framebuffer_blit_to_R4G4B4A4(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_framebuffer_blit_to_R8G8B8A8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_framebuffer_blit_to_R32(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_framebuffer_blit_to_R32G32B32(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_framebuffer_blit_to_R32G32B32A32(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_framebuffer_blit_to_R16(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_framebuffer_blit_to_R16G16B16(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_framebuffer_blit_to_R16G16B16A16(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); + break; - float color[4]; - const int srcIndex = ySrcInt * fbWidth + xSrcInt; - sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, srcIndex)); + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + RLSW.errCode = SW_INVALID_ENUM; + break; - const int dstIndex = (yDst + y) * wDst + (xDst + x); - sw_set_pixel(pixels, dstIndex, pFormat, color); - } } } From 55e503171b88fbbc5df1d1b7e99c0ea4d223db65 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 11 May 2025 03:06:57 +0200 Subject: [PATCH 059/105] review framebuffer filling functions --- src/external/rlsw.h | 205 ++++++++++++++++++++++++++++---------------- 1 file changed, 133 insertions(+), 72 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index ce7173ea8..c7bab54ec 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -998,21 +998,25 @@ static inline void sw_framebuffer_write_color(void* dst, float color[3]) static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { - uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; - uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; - uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t r8 = (uint8_t)(color[0] * 7.0f + 0.5f); + uint8_t g8 = (uint8_t)(color[1] * 7.0f + 0.5f); + uint8_t b8 = (uint8_t)(color[2] * 3.0f + 0.5f); + + uint8_t packedColor = ((r8 & 0x07) << 5) | ((g8 & 0x07) << 2) | (b8 & 0x03); uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - p[y * RLSW.framebuffer.width + x] = (r << 5) | (g << 2) | b; + uint8_t* curPtr = p + y * RLSW.framebuffer.width + RLSW.scMin[0]; + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = packedColor; } } } else { for (int i = 0; i < size; i++) { - p[i] = (r << 5) | (g << 2) | b; + *p++ = packedColor; } } } @@ -1054,21 +1058,25 @@ static inline void sw_framebuffer_write_color(void* dst, float color[3]) static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { - uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; - uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; - uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t r16 = (uint16_t)(color[0] * 31.0f + 0.5f); + uint16_t g_16 = (uint16_t)(color[1] * 63.0f + 0.5f); + uint16_t b_16 = (uint16_t)(color[2] * 31.0f + 0.5f); + + uint16_t packedColor = ((r16 & 0x1F) << 11) | ((g_16 & 0x3F) << 5) | (b_16 & 0x1F); uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - p[y * RLSW.framebuffer.width + x] = (r << 11) | (g << 5) | b; + uint16_t* curPtr = p + y * RLSW.framebuffer.width + RLSW.scMin[0]; + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = packedColor; } } } else { for (int i = 0; i < size; i++) { - p[i] = (r << 11) | (g << 5) | b; + *p++ = packedColor; } } } @@ -1100,19 +1108,20 @@ static inline void sw_framebuffer_write_color(void* dst, float color[3]) static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { - uint8_t r = (uint8_t)(color[0] * 255); - uint8_t g = (uint8_t)(color[1] * 255); - uint8_t b = (uint8_t)(color[2] * 255); + uint8_t r = (uint8_t)(color[0] * 255.0f); + uint8_t g = (uint8_t)(color[1] * 255.0f); + uint8_t b = (uint8_t)(color[2] * 255.0f); uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - int offset = (y * RLSW.framebuffer.width + x) * 3; - p[offset + 0] = r; - p[offset + 1] = g; - p[offset + 2] = b; + uint8_t* curPtr = p + 3 * (y * RLSW.framebuffer.width + RLSW.scMin[0]); + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = r; + *curPtr++ = g; + *curPtr++ = b; } } } else { @@ -1140,19 +1149,21 @@ static inline void sw_framebuffer_write_depth(void* dst, float depth) static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { - uint8_t v = value * UINT8_MAX; + uint8_t d8 = (uint8_t)(value * UINT8_MAX); uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - p[y * RLSW.framebuffer.width + x] = v; + uint8_t* curPtr = p + y * RLSW.framebuffer.width + RLSW.scMin[0]; + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = d8; } } } else { for (int i = 0; i < size; i++) { - p[i] = v; + *p++ = d8; } } } @@ -1171,19 +1182,21 @@ static inline void sw_framebuffer_write_depth(void* dst, float depth) static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { - uint16_t v = value * UINT16_MAX; + uint16_t d16 = (uint16_t)(value * UINT16_MAX); uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - p[y * RLSW.framebuffer.width + x] = v; + uint16_t* curPtr = p + y * RLSW.framebuffer.width + RLSW.scMin[0]; + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = d16; } } } else { for (int i = 0; i < size; i++) { - p[i] = v; + *p++ = d16; } } } @@ -1210,24 +1223,29 @@ static inline void sw_framebuffer_write_depth(void* dst, float depth) static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { - uint32_t v = value * UINT32_MAX; + uint32_t d32 = (uint32_t)(value * UINT32_MAX); + uint8_t d_byte0 = (uint8_t)((d32 >> 16) & 0xFF); + uint8_t d_byte1 = (uint8_t)((d32 >> 8) & 0xFF); + uint8_t d_byte2 = (uint8_t)(d32 & 0xFF); + uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - int offset = y * RLSW.framebuffer.width + x; - p[3 * offset + 0] = (v >> 16) & 0xFF; - p[3 * offset + 1] = (v >> 8) & 0xFF; - p[3 * offset + 2] = v & 0xFF; + uint8_t* curPtr = p + 3 * (y * RLSW.framebuffer.width + RLSW.scMin[0]); + for (int xCount = 0; xCount < wScissor; xCount++) { + *curPtr++ = d_byte0; + *curPtr++ = d_byte1; + *curPtr++ = d_byte2; } } } else { for (int i = 0; i < size; i++) { - *p++ = (v >> 16) & 0xFF; - *p++ = (v >> 8) & 0xFF; - *p++ = v & 0xFF; + *p++ = d_byte0; + *p++ = d_byte1; + *p++ = d_byte2; } } } @@ -1237,78 +1255,121 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) - uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; - uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; - uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + // Calculate and pack 3:3:2 color + // Scale color components to the max value for each bit depth and round + uint8_t r8 = (uint8_t)(color[0] * 7.0f + 0.5f); + uint8_t g8 = (uint8_t)(color[1] * 7.0f + 0.5f); + uint8_t b8 = (uint8_t)(color[2] * 3.0f + 0.5f); + // Pack the components into a single byte + uint8_t packedColor = ((r8 & 0x07) << 5) | ((g8 & 0x07) << 2) | (b8 & 0x03); uint8_t* cptr = (uint8_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 16) - uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; - uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; - uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + // Calculate and pack 5:6:5 color + // Scale color components to the max value for each bit depth and round + uint16_t r16 = (uint16_t)(color[0] * 31.0f + 0.5f); + uint16_t r16 = (uint16_t)(color[1] * 63.0f + 0.5f); + uint16_t b16 = (uint16_t)(color[2] * 31.0f + 0.5f); + // Pack the components into a 16-bit value + uint16_t packedColor = ((r16 & 0x1F) << 11) | ((r16 & 0x3F) << 5) | (b16 & 0x1F); uint16_t* cptr = (uint16_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 24) - uint8_t r = (uint8_t)(color[0] * 255); - uint8_t g = (uint8_t)(color[1] * 255); - uint8_t b = (uint8_t)(color[2] * 255); + // Calculate 8:8:8 color components + uint8_t r24 = (uint8_t)(color[0] * 255.0f); + uint8_t g24 = (uint8_t)(color[1] * 255.0f); + uint8_t b24 = (uint8_t)(color[2] * 255.0f); uint8_t* cptr = (uint8_t*)colorPtr; #endif #if (SW_DEPTH_BUFFER_BITS == 8) - uint8_t d = depth_value * UINT8_MAX; + // Calculate 8-bit depth + uint8_t d8 = (uint8_t)(depth_value * UINT8_MAX); uint8_t* dptr = (uint8_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 16) - uint16_t d = depth_value * UINT16_MAX; + // Calculate 16-bit depth + uint16_t d16 = (uint16_t)(depth_value * UINT16_MAX); uint16_t* dptr = (uint16_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 24) - uint32_t d = depth_value * UINT32_MAX; + // Calculate 24-bit depth and pre-calculate bytes + uint32_t d32 = (uint32_t)(depth_value * UINT32_MAX); + uint8_t dByte0 = (uint8_t)((d32 >> 16) & 0xFF); + uint8_t dByte1 = (uint8_t)((d32 >> 8) & 0xFF); + uint8_t dByte2 = (uint8_t)(d32 & 0xFF); uint8_t* dptr = (uint8_t*)depthPtr; #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int wScissor = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { - for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { - int offset = y * RLSW.framebuffer.width + x; + int row_start_index = y * RLSW.framebuffer.width + RLSW.scMin[0]; + + // Calculate starting pointers for the current row within the scissor rectangle # if (SW_COLOR_BUFFER_BITS == 8) - cptr[offset] = (r << 5) | (g << 2) | b; + uint8_t* curCPtr = cptr + row_start_index; # elif (SW_COLOR_BUFFER_BITS == 16) - cptr[offset] = (r << 11) | (g << 5) | b; + uint16_t* curCPtr = cptr + row_start_index; # elif (SW_COLOR_BUFFER_BITS == 24) - cptr[3 * offset + 0] = r; - cptr[3 * offset + 1] = g; - cptr[3 * offset + 2] = b; + uint8_t* curCPtr = cptr + 3 * row_start_index; # endif + # if (SW_DEPTH_BUFFER_BITS == 8) - dptr[offset] = d; + uint8_t* curDPtr = dptr + row_start_index; # elif (SW_DEPTH_BUFFER_BITS == 16) - dptr[offset] = d; + uint16_t* curDPtr = dptr + row_start_index; # elif (SW_DEPTH_BUFFER_BITS == 24) - dptr[3 * offset + 0] = (d >> 16) & 0xFF; - dptr[3 * offset + 1] = (d >> 8) & 0xFF; - dptr[3 * offset + 2] = d & 0xFF; + uint8_t* curDPtr = dptr + 3 * row_start_index; +# endif + + // Fill the current row within the scissor rectangle + for (int xCount = 0; xCount < wScissor; xCount++) + { + // Write color +# if (SW_COLOR_BUFFER_BITS == 8) + *curCPtr++ = packedColor; +# elif (SW_COLOR_BUFFER_BITS == 16) + *curCPtr++ = packedColor; +# elif (SW_COLOR_BUFFER_BITS == 24) + *curCPtr++ = r24; + *curCPtr++ = g24; + *curCPtr++ = b24; +# endif + + // Write depth +# if (SW_DEPTH_BUFFER_BITS == 8) + *curDPtr++ = d8; +# elif (SW_DEPTH_BUFFER_BITS == 16) + *curDPtr++ = d16; +# elif (SW_DEPTH_BUFFER_BITS == 24) + *curDPtr++ = dByte0; + *curDPtr++ = dByte1; + *curDPtr++ = dByte2; # endif } } return; } - for (int i = 0; i < size; i++) { + for (int i = 0; i < size; i++) + { + // Write color # if (SW_COLOR_BUFFER_BITS == 8) - cptr[i] = (r << 5) | (g << 2) | b; + *cptr++ = packedColor; # elif (SW_COLOR_BUFFER_BITS == 16) - cptr[i] = (r << 11) | (g << 5) | b; + *cptr++ = packedColor; # elif (SW_COLOR_BUFFER_BITS == 24) - *cptr++ = r; - *cptr++ = g; - *cptr++ = b; + *cptr++ = r24; + *cptr++ = g24; + *cptr++ = b24; # endif + + // Write depth # if (SW_DEPTH_BUFFER_BITS == 8) - dptr[i] = d; + *dptr++ = d8; # elif (SW_DEPTH_BUFFER_BITS == 16) - dptr[i] = d; + *dptr++ = d16; # elif (SW_DEPTH_BUFFER_BITS == 24) - *dptr++ = (d >> 16) & 0xFF; - *dptr++ = (d >> 8) & 0xFF; - *dptr++ = d & 0xFF; + *dptr++ = dByte0; + *dptr++ = dByte1; + *dptr++ = dByte2; # endif } } From 1c7162271cdc99cb39a3016d6bcddbee2a9ba027 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 15 May 2025 02:03:23 +0200 Subject: [PATCH 060/105] impl specific quad rendering func --- src/external/rlsw.h | 152 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 141 insertions(+), 11 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c7bab54ec..5028139bf 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2171,7 +2171,7 @@ static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) } -/* === Triangle Rendering Part === */ +/* === Polygon Clipping Part === */ #define DEFINE_CLIP_FUNC(name, FUNC_IS_INSIDE, FUNC_COMPUTE_T) \ static inline int sw_clip_##name( \ @@ -2250,7 +2250,7 @@ DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX // Main clip function -static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; int n = *vertexCounter; @@ -2284,6 +2284,9 @@ static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_V return n > 0; } + +/* === Triangle Rendering Part === */ + static inline void sw_triangle_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { // Step 1: Face culling - discard triangles facing away @@ -2312,13 +2315,13 @@ static inline void sw_triangle_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPP return; } } - + // Step 2: Clipping and perspective projection - if (sw_triangle_clip(polygon, vertexCounter) && *vertexCounter >= 3) { + if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 3) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = &polygon[i]; // Use &polygon[i] instead of polygon + i + sw_vertex_t *v = &polygon[i]; // Calculation of the reciprocal of W for normalization // as well as perspective-correct attributes @@ -2668,6 +2671,137 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* } +/* === Quad Rendering Part === */ + +static inline void sw_quad_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + // Step 1: Face culling - discard quads facing away + if (RLSW.stateFlags & SW_STATE_CULL_FACE) { + + // NOTE: We use Green's theorem (signed polygon area) instead of triangulation. + // This is faster but only reliable if the quad is convex and not self-intersecting. + // For face culling purposes, this approximation is acceptable. + + // Preload homogeneous coordinates into local variables + const float* h0 = polygon[0].homogeneous; + const float* h1 = polygon[1].homogeneous; + const float* h2 = polygon[2].homogeneous; + const float* h3 = polygon[3].homogeneous; + + // Compute 1/w once and delay divisions + const float invW0 = 1.0f / h0[3]; + const float invW1 = 1.0f / h1[3]; + const float invW2 = 1.0f / h2[3]; + const float invW3 = 1.0f / h3[3]; + + // Pre-multiply to get x/w and y/w coordinates + const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; + const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; + const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; + const float x3 = h3[0] * invW3, y3 = h3[1] * invW3; + + // Use Green's theorem (signed polygon area) + // area = 0.5 * sum of (xi * yi+1 - xi+1 * yi) + // The factor 0.5 is not needed here, only the sign matters. + const float sgnArea = + (x0 * y1 - x1 * y0) + + (x1 * y2 - x2 * y1) + + (x2 * y3 - x3 * y2) + + (x3 * y0 - x0 * y3); + + // Perform face culling based on area sign + if ((RLSW.cullFace == SW_FRONT) ? (sgnArea >= 0.0f) : (sgnArea <= 0.0f)) { + *vertexCounter = 0; + return; + } + } + + // Step 2: Clipping and perspective projection + if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 4) { + + // Transformation to screen space and normalization + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = &polygon[i]; + + // Calculation of the reciprocal of W for normalization + // as well as perspective-correct attributes + const float invW = 1.0f / v->homogeneous[3]; + v->homogeneous[3] = invW; + + // Division of XYZ coordinates by weight + v->homogeneous[0] *= invW; + v->homogeneous[1] *= invW; + v->homogeneous[2] *= invW; + + // Division of texture coordinates (perspective-correct) + v->texcoord[0] *= invW; + v->texcoord[1] *= invW; + + // Division of colors (perspective-correct) + v->color[0] *= invW; + v->color[1] *= invW; + v->color[2] *= invW; + v->color[3] *= invW; + + // Transformation to screen space + sw_project_ndc_to_screen(v->screen, v->homogeneous); + } + } +} + +static inline void sw_quad_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_vertex_t* v3) +{ + int vertexCounter = 4; + + sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; + polygon[0] = *v0; + polygon[1] = *v1; + polygon[2] = *v2; + polygon[3] = *v3; + + sw_quad_clip_and_project(polygon, &vertexCounter); + + if (vertexCounter < 4) { + return; + } + +# define TRIANGLE_RASTER(RASTER_FUNC) \ + { \ + for (int i = 0; i < vertexCounter - 2; i++) { \ + RASTER_FUNC( \ + &polygon[0], &polygon[i + 1], &polygon[i + 2], \ + &RLSW.loadedTextures[RLSW.currentTexture] \ + ); \ + } \ + } + + if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX) + } + else { + TRIANGLE_RASTER(sw_triangle_raster) + } +} + + /* === Line Rendering Part === */ static inline bool sw_line_clip_coord(float q, float p, float* t0, float* t1) @@ -3237,15 +3371,11 @@ static inline void sw_poly_fill_render(void) ); break; case SW_QUADS: - sw_triangle_render( + sw_quad_render( &RLSW.vertexBuffer[0], &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - sw_triangle_render( &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3], - &RLSW.vertexBuffer[0] + &RLSW.vertexBuffer[3] ); break; } From 5e181ede7e4b69fe93b3c0de197c391c5cd4547d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 15 May 2025 02:23:43 +0200 Subject: [PATCH 061/105] use of a single global vertex buffer --- src/external/rlsw.h | 108 +++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 61 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5028139bf..0ef1f997d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -646,7 +646,7 @@ typedef struct { uint8_t* colors; } array; - sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering + sw_vertex_t vertexBuffer[SW_MAX_CLIPPED_POLYGON_VERTICES]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' SWdraw drawMode; // Current primitive mode (e.g., lines, triangles) @@ -2287,8 +2287,11 @@ static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VE /* === Triangle Rendering Part === */ -static inline void sw_triangle_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline void sw_triangle_clip_and_project(void) { + sw_vertex_t* polygon = RLSW.vertexBuffer; + int* vertexCounter = &RLSW.vertexCounter; + // Step 1: Face culling - discard triangles facing away if (RLSW.stateFlags & SW_STATE_CULL_FACE) { @@ -2619,26 +2622,21 @@ DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND, sw_triangle_raster_scanline DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND, sw_triangle_raster_scanline_DEPTH_BLEND, false) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_scanline_TEX_DEPTH_BLEND, true) -static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) +static inline void sw_triangle_render(void) { - int vertexCounter = 3; + sw_triangle_clip_and_project(); - sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; - polygon[0] = *v0; - polygon[1] = *v1; - polygon[2] = *v2; - - sw_triangle_clip_and_project(polygon, &vertexCounter); - - if (vertexCounter < 3) { + if (RLSW.vertexCounter < 3) { return; } # define TRIANGLE_RASTER(RASTER_FUNC) \ { \ - for (int i = 0; i < vertexCounter - 2; i++) { \ + for (int i = 0; i < RLSW.vertexCounter - 2; i++) { \ RASTER_FUNC( \ - &polygon[0], &polygon[i + 1], &polygon[i + 2], \ + &RLSW.vertexBuffer[0], \ + &RLSW.vertexBuffer[i + 1], \ + &RLSW.vertexBuffer[i + 2], \ &RLSW.loadedTextures[RLSW.currentTexture] \ ); \ } \ @@ -2668,13 +2666,18 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* else { TRIANGLE_RASTER(sw_triangle_raster) } + +# undef TRIANGLE_RASTER } /* === Quad Rendering Part === */ -static inline void sw_quad_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline void sw_quad_clip_and_project() { + sw_vertex_t* polygon = RLSW.vertexBuffer; + int* vertexCounter = &RLSW.vertexCounter; + // Step 1: Face culling - discard quads facing away if (RLSW.stateFlags & SW_STATE_CULL_FACE) { @@ -2749,27 +2752,21 @@ static inline void sw_quad_clip_and_project(sw_vertex_t polygon[SW_MAX_CLIPPED_P } } -static inline void sw_quad_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_vertex_t* v3) +static inline void sw_quad_render(void) { - int vertexCounter = 4; + sw_quad_clip_and_project(); - sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; - polygon[0] = *v0; - polygon[1] = *v1; - polygon[2] = *v2; - polygon[3] = *v3; - - sw_quad_clip_and_project(polygon, &vertexCounter); - - if (vertexCounter < 4) { + if (RLSW.vertexCounter < 4) { return; } # define TRIANGLE_RASTER(RASTER_FUNC) \ { \ - for (int i = 0; i < vertexCounter - 2; i++) { \ + for (int i = 0; i < RLSW.vertexCounter - 2; i++) { \ RASTER_FUNC( \ - &polygon[0], &polygon[i + 1], &polygon[i + 2], \ + &RLSW.vertexBuffer[0], \ + &RLSW.vertexBuffer[i + 1], \ + &RLSW.vertexBuffer[i + 2], \ &RLSW.loadedTextures[RLSW.currentTexture] \ ); \ } \ @@ -2799,6 +2796,8 @@ static inline void sw_quad_render(const sw_vertex_t* v0, const sw_vertex_t* v1, else { TRIANGLE_RASTER(sw_triangle_raster) } + +# undef TRIANGLE_RASTER } @@ -3083,38 +3082,38 @@ DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH, sw_line_raster_DEPTH) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_BLEND, sw_line_raster_BLEND) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_BLEND) -static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) +static inline void sw_line_render(sw_vertex_t* vertices) { - if (!sw_line_clip_and_project(v0, v1)) { + if (!sw_line_clip_and_project(&vertices[0], &vertices[1])) { return; } if (RLSW.lineWidth >= 2.0f) { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_line_thick_raster_DEPTH_BLEND(v0, v1); + sw_line_thick_raster_DEPTH_BLEND(&vertices[0], &vertices[1]); } else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_line_thick_raster_BLEND(v0, v1); + sw_line_thick_raster_BLEND(&vertices[0], &vertices[1]); } else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_line_thick_raster_DEPTH(v0, v1); + sw_line_thick_raster_DEPTH(&vertices[0], &vertices[1]); } else { - sw_line_thick_raster(v0, v1); + sw_line_thick_raster(&vertices[0], &vertices[1]); } } else { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_line_raster_DEPTH_BLEND(v0, v1); + sw_line_raster_DEPTH_BLEND(&vertices[0], &vertices[1]); } else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_line_raster_BLEND(v0, v1); + sw_line_raster_BLEND(&vertices[0], &vertices[1]); } else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_line_raster_DEPTH(v0, v1); + sw_line_raster_DEPTH(&vertices[0], &vertices[1]); } else { - sw_line_raster(v0, v1); + sw_line_raster(&vertices[0], &vertices[1]); } } } @@ -3338,45 +3337,32 @@ static inline void sw_poly_line_render(void) { const sw_vertex_t* vertices = RLSW.vertexBuffer; int cm1 = RLSW.vertexCounter - 1; - sw_vertex_t v0, v1; for (int i = 0; i < cm1; i++) { - v0 = vertices[i], v1 = vertices[i + 1]; - sw_line_render(&v0, &v1); + sw_line_render((sw_vertex_t[2]){ + vertices[i], vertices[i + 1] + }); } - v0 = vertices[cm1], v1 = vertices[0]; - sw_line_render(&v0, &v1); + sw_line_render((sw_vertex_t[2]){ + vertices[cm1], vertices[0] + }); } static inline void sw_poly_fill_render(void) { switch (RLSW.drawMode) { case SW_POINTS: - sw_point_render( - &RLSW.vertexBuffer[0] - ); + sw_point_render(&RLSW.vertexBuffer[0]); break; case SW_LINES: - sw_line_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1] - ); + sw_line_render(RLSW.vertexBuffer); break; case SW_TRIANGLES: - sw_triangle_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); + sw_triangle_render(); break; case SW_QUADS: - sw_quad_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3] - ); + sw_quad_render(); break; } } From 110445cfe218d04a314877a2c9469157ec1d8775 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 15 May 2025 02:28:12 +0200 Subject: [PATCH 062/105] fix 'sw_poly_point_render' --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 0ef1f997d..a2ae51498 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3329,7 +3329,7 @@ static inline void sw_point_render(sw_vertex_t* v) static inline void sw_poly_point_render(void) { for (int i = 0; i < RLSW.vertexCounter; i++) { - sw_point_render(RLSW.vertexBuffer); + sw_point_render(&RLSW.vertexBuffer[i]); } } From fd37d4f5280314bb6206ffb47ece8ee91d430d39 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 15 May 2025 02:36:04 +0200 Subject: [PATCH 063/105] added `SW_RESTRICT` and redesigned `sw_lerp_vertex_PNCTH` --- src/external/rlsw.h | 52 ++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a2ae51498..12e522597 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -42,6 +42,14 @@ # define SW_FREE(ptr) free(ptr) #endif +#ifndef SW_RESTRICT +# ifdef _MSC_VER +# define SW_RESTRICT __restrict +# else +# define SW_RESTRICT restrict +# endif +#endif + #ifndef SW_GL_BINDING_COPY_TEXTURE # define SW_GL_BINDING_COPY_TEXTURE true #endif @@ -803,35 +811,31 @@ static inline float sw_lerp(float a, float b, float t) return a + t * (b - a); } -static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_vertex_t* b, float t) +static inline void sw_lerp_vertex_PNTCH(sw_vertex_t* SW_RESTRICT out, const sw_vertex_t* SW_RESTRICT a, const sw_vertex_t* SW_RESTRICT b, float t) { - sw_vertex_t result; - const float tInv = 1.0f - t; // Position interpolation (4 components) - result.position[0] = a->position[0] * tInv + b->position[0] * t; - result.position[1] = a->position[1] * tInv + b->position[1] * t; - result.position[2] = a->position[2] * tInv + b->position[2] * t; - result.position[3] = a->position[3] * tInv + b->position[3] * t; + out->position[0] = a->position[0] * tInv + b->position[0] * t; + out->position[1] = a->position[1] * tInv + b->position[1] * t; + out->position[2] = a->position[2] * tInv + b->position[2] * t; + out->position[3] = a->position[3] * tInv + b->position[3] * t; // Texture coordinate interpolation (2 components) - result.texcoord[0] = a->texcoord[0] * tInv + b->texcoord[0] * t; - result.texcoord[1] = a->texcoord[1] * tInv + b->texcoord[1] * t; + out->texcoord[0] = a->texcoord[0] * tInv + b->texcoord[0] * t; + out->texcoord[1] = a->texcoord[1] * tInv + b->texcoord[1] * t; // Color interpolation (4 components) - result.color[0] = a->color[0] * tInv + b->color[0] * t; - result.color[1] = a->color[1] * tInv + b->color[1] * t; - result.color[2] = a->color[2] * tInv + b->color[2] * t; - result.color[3] = a->color[3] * tInv + b->color[3] * t; + out->color[0] = a->color[0] * tInv + b->color[0] * t; + out->color[1] = a->color[1] * tInv + b->color[1] * t; + out->color[2] = a->color[2] * tInv + b->color[2] * t; + out->color[3] = a->color[3] * tInv + b->color[3] * t; // Homogeneous coordinate interpolation (4 components) - result.homogeneous[0] = a->homogeneous[0] * tInv + b->homogeneous[0] * t; - result.homogeneous[1] = a->homogeneous[1] * tInv + b->homogeneous[1] * t; - result.homogeneous[2] = a->homogeneous[2] * tInv + b->homogeneous[2] * t; - result.homogeneous[3] = a->homogeneous[3] * tInv + b->homogeneous[3] * t; - - return result; + out->homogeneous[0] = a->homogeneous[0] * tInv + b->homogeneous[0] * t; + out->homogeneous[1] = a->homogeneous[1] * tInv + b->homogeneous[1] * t; + out->homogeneous[2] = a->homogeneous[2] * tInv + b->homogeneous[2] * t; + out->homogeneous[3] = a->homogeneous[3] * tInv + b->homogeneous[3] * t; } @@ -2190,7 +2194,7 @@ static inline int sw_clip_##name( /* If transition between interior/exterior, calculate intersection point */ \ if (prevInside != currInside) { \ float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \ - output[outputCount++] = sw_lerp_vertex_PNTCH(prev, curr, t); \ + sw_lerp_vertex_PNTCH(&output[outputCount++], prev, curr, t); \ } \ \ /* If current vertex inside, add it */ \ @@ -2546,8 +2550,8 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const float t2 = dy * invH10; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ - end = sw_lerp_vertex_PNTCH(v0, v1, t2); \ + sw_lerp_vertex_PNTCH(&start, v0, v2, t1); \ + sw_lerp_vertex_PNTCH(&end, v0, v1, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ @@ -2582,8 +2586,8 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const float t2 = (float)(y - y1) * invH21; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ - end = sw_lerp_vertex_PNTCH(v1, v2, t2); \ + sw_lerp_vertex_PNTCH(&start, v0, v2, t1); \ + sw_lerp_vertex_PNTCH(&end, v1, v2, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ From 515e894076d1b8fa453e21e514f07c1a06d28140 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 15 May 2025 02:54:32 +0200 Subject: [PATCH 064/105] tweak the pipeline flow regarding the face culling avoids misprediction, improves vectorization if possible --- src/external/rlsw.h | 152 +++++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 71 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 12e522597..d7a777ecd 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2291,39 +2291,41 @@ static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VE /* === Triangle Rendering Part === */ +static inline bool sw_triangle_face_culling(void) +{ + // NOTE: Face culling is done before clipping to avoid unnecessary computations. + // However, culling requires NDC coordinates, while clipping must be done + // in homogeneous space to correctly interpolate newly generated vertices. + // This means we need to compute 1/W twice: + // - Once before clipping for face culling. + // - Again after clipping for the new vertices. + + // Preload homogeneous coordinates into local variables + const float* h0 = RLSW.vertexBuffer[0].homogeneous; + const float* h1 = RLSW.vertexBuffer[1].homogeneous; + const float* h2 = RLSW.vertexBuffer[2].homogeneous; + + // Compute 1/w once and delay divisions + const float invW0 = 1.0f / h0[3]; + const float invW1 = 1.0f / h1[3]; + const float invW2 = 1.0f / h2[3]; + + // Compute the signed 2D area (cross product in Z) + const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; + const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; + const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; + const float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + + // Discard the triangle if it faces the culled direction + return (RLSW.cullFace == SW_FRONT) + ? (sgnArea < 0) : (sgnArea > 0); +} + static inline void sw_triangle_clip_and_project(void) { sw_vertex_t* polygon = RLSW.vertexBuffer; int* vertexCounter = &RLSW.vertexCounter; - // Step 1: Face culling - discard triangles facing away - if (RLSW.stateFlags & SW_STATE_CULL_FACE) { - - // NOTE: Face culling is done before clipping to avoid unnecessary computations. - // However, culling requires NDC coordinates, while clipping must be done - // in homogeneous space to correctly interpolate newly generated vertices. - // This means we need to compute 1/W twice: - // - Once before clipping for face culling. - // - Again after clipping for the new vertices. - - const float invW0 = 1.0f / polygon[0].homogeneous[3]; - const float invW1 = 1.0f / polygon[1].homogeneous[3]; - const float invW2 = 1.0f / polygon[2].homogeneous[3]; - - // Compute the signed 2D area (cross product in Z) - const float x0 = polygon[0].homogeneous[0] * invW0, y0 = polygon[0].homogeneous[1] * invW0; - const float x1 = polygon[1].homogeneous[0] * invW1, y1 = polygon[1].homogeneous[1] * invW1; - const float x2 = polygon[2].homogeneous[0] * invW2, y2 = polygon[2].homogeneous[1] * invW2; - const float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); - - // Discard the triangle if it faces the culled direction - if ((RLSW.cullFace == SW_FRONT) ? (sgnArea >= 0) : (sgnArea <= 0)) { - *vertexCounter = 0; - return; - } - } - - // Step 2: Clipping and perspective projection if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 3) { // Transformation to screen space and normalization @@ -2628,6 +2630,12 @@ DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_sc static inline void sw_triangle_render(void) { + if (RLSW.stateFlags & SW_STATE_CULL_FACE) { + if (!sw_triangle_face_culling()) { + return; + } + } + sw_triangle_clip_and_project(); if (RLSW.vertexCounter < 3) { @@ -2677,53 +2685,49 @@ static inline void sw_triangle_render(void) /* === Quad Rendering Part === */ -static inline void sw_quad_clip_and_project() +static inline bool sw_quad_face_culling(void) +{ + // NOTE: We use Green's theorem (signed polygon area) instead of triangulation. + // This is faster but only reliable if the quad is convex and not self-intersecting. + // For face culling purposes, this approximation is acceptable. + + // Preload homogeneous coordinates into local variables + const float* h0 = RLSW.vertexBuffer[0].homogeneous; + const float* h1 = RLSW.vertexBuffer[1].homogeneous; + const float* h2 = RLSW.vertexBuffer[2].homogeneous; + const float* h3 = RLSW.vertexBuffer[3].homogeneous; + + // Compute 1/w once and delay divisions + const float invW0 = 1.0f / h0[3]; + const float invW1 = 1.0f / h1[3]; + const float invW2 = 1.0f / h2[3]; + const float invW3 = 1.0f / h3[3]; + + // Pre-multiply to get x/w and y/w coordinates + const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; + const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; + const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; + const float x3 = h3[0] * invW3, y3 = h3[1] * invW3; + + // Use Green's theorem (signed polygon area) + // area = 0.5 * sum of (xi * yi+1 - xi+1 * yi) + // The factor 0.5 is not needed here, only the sign matters. + const float sgnArea = + (x0 * y1 - x1 * y0) + + (x1 * y2 - x2 * y1) + + (x2 * y3 - x3 * y2) + + (x3 * y0 - x0 * y3); + + // Perform face culling based on area sign + return (RLSW.cullFace == SW_FRONT) + ? (sgnArea < 0.0f) : (sgnArea > 0.0f); +} + +static inline void sw_quad_clip_and_project(void) { sw_vertex_t* polygon = RLSW.vertexBuffer; int* vertexCounter = &RLSW.vertexCounter; - // Step 1: Face culling - discard quads facing away - if (RLSW.stateFlags & SW_STATE_CULL_FACE) { - - // NOTE: We use Green's theorem (signed polygon area) instead of triangulation. - // This is faster but only reliable if the quad is convex and not self-intersecting. - // For face culling purposes, this approximation is acceptable. - - // Preload homogeneous coordinates into local variables - const float* h0 = polygon[0].homogeneous; - const float* h1 = polygon[1].homogeneous; - const float* h2 = polygon[2].homogeneous; - const float* h3 = polygon[3].homogeneous; - - // Compute 1/w once and delay divisions - const float invW0 = 1.0f / h0[3]; - const float invW1 = 1.0f / h1[3]; - const float invW2 = 1.0f / h2[3]; - const float invW3 = 1.0f / h3[3]; - - // Pre-multiply to get x/w and y/w coordinates - const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; - const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; - const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; - const float x3 = h3[0] * invW3, y3 = h3[1] * invW3; - - // Use Green's theorem (signed polygon area) - // area = 0.5 * sum of (xi * yi+1 - xi+1 * yi) - // The factor 0.5 is not needed here, only the sign matters. - const float sgnArea = - (x0 * y1 - x1 * y0) - + (x1 * y2 - x2 * y1) - + (x2 * y3 - x3 * y2) - + (x3 * y0 - x0 * y3); - - // Perform face culling based on area sign - if ((RLSW.cullFace == SW_FRONT) ? (sgnArea >= 0.0f) : (sgnArea <= 0.0f)) { - *vertexCounter = 0; - return; - } - } - - // Step 2: Clipping and perspective projection if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 4) { // Transformation to screen space and normalization @@ -2758,6 +2762,12 @@ static inline void sw_quad_clip_and_project() static inline void sw_quad_render(void) { + if (RLSW.stateFlags & SW_STATE_CULL_FACE) { + if (!sw_quad_face_culling()) { + return; + } + } + sw_quad_clip_and_project(); if (RLSW.vertexCounter < 4) { From e565077833ff9d649adb44434cda440da95201ce Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Fri, 16 May 2025 21:04:15 +0200 Subject: [PATCH 065/105] new rendering path for axis aligned quads --- src/external/rlsw.h | 353 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 332 insertions(+), 21 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d7a777ecd..8e6a422b6 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2059,7 +2059,7 @@ static inline void sw_texture_sample_linear(float* color, const sw_texture_t* te } static inline void sw_texture_sample(float* color, const sw_texture_t* tex, float u, float v, - float xDu, float yDu, float xDv, float yDv) + float duDx, float duDy, float dvDx, float dvDy) { // TODO: It seems there are some incorrect detections depending on the context // This is probably due to the fact that the fractions are obtained @@ -2068,13 +2068,13 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa // Previous method: There is no need to compute the square root // because using the squared value, the comparison remains `L2 > 1.0f * 1.0f` - //float du = sqrtf(xDu * xDu + yDu * yDu); - //float dv = sqrtf(xDv * xDv + yDv * yDv); + //float du = sqrtf(duDx * duDx + duDy * duDy); + //float dv = sqrtf(dvDx * dvDx + dvDy * dvDy); //float L = (du > dv) ? du : dv; // Calculate the derivatives for each axis - float du2 = xDu * xDu + yDu * yDu; - float dv2 = xDv * xDv + yDv * yDv; + float du2 = duDx * duDx + duDy * duDy; + float dv2 = dvDx * dvDx + dvDy * dvDy; float L2 = (du2 > dv2) ? du2 : dv2; SWfilter filter = (L2 > 1.0f) @@ -2360,7 +2360,7 @@ static inline void sw_triangle_clip_and_project(void) #define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ - const sw_vertex_t* end, float yDu, float yDv) \ + const sw_vertex_t* end, float duDy, float dvDy) \ { \ /* Convert and center the screen coordinates */ \ int xStart = (int)(start->screen[0] + 0.5f); \ @@ -2379,10 +2379,10 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, float t = (xStart - start->screen[0]) * dt; \ \ /* Calculate the horizontal gradients for UV coordinates */ \ - float xDu, xDv; \ + float duDx, dvDx; \ if (ENABLE_TEXTURE) { \ - xDu = (end->texcoord[0] - start->texcoord[0]) * dt; \ - xDv = (end->texcoord[1] - start->texcoord[1]) * dt; \ + duDx = (end->texcoord[0] - start->texcoord[0]) * dt; \ + dvDx = (end->texcoord[1] - start->texcoord[1]) * dt; \ } \ \ /* Pre-calculate the color differences for interpolation */ \ @@ -2399,8 +2399,8 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, float u, v; \ if (ENABLE_TEXTURE) { \ /* Initialize the interpolated texture coordinates */ \ - u = start->texcoord[0] + t * xDu; \ - v = start->texcoord[1] + t * xDv; \ + u = start->texcoord[0] + t * duDx; \ + v = start->texcoord[1] + t * dvDx; \ } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ @@ -2438,7 +2438,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, float srcColor[4]; \ if (ENABLE_TEXTURE) \ { \ - sw_texture_sample(srcColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + sw_texture_sample(srcColor, tex, u * w, v * w, duDx, duDy, dvDx, dvDy); \ srcColor[0] *= (start->color[0] + t * dcol[0]) * w; \ srcColor[1] *= (start->color[1] + t * dcol[1]) * w; \ srcColor[2] *= (start->color[2] + t * dcol[2]) * w; \ @@ -2465,7 +2465,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, if (ENABLE_TEXTURE) \ { \ float color[4]; \ - sw_texture_sample(color, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + sw_texture_sample(color, tex, u * w, v * w, duDx, duDy, dvDx, dvDy); \ color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ @@ -2487,8 +2487,8 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, sw_framebuffer_inc_color_addr(&cptr); \ sw_framebuffer_inc_depth_addr(&dptr); \ if (ENABLE_TEXTURE) { \ - u += xDu; \ - v += xDv; \ + u += duDx; \ + v += dvDx; \ } \ } \ } @@ -2528,10 +2528,10 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const int yBottom = (int)(y2 + 0.5f); \ \ /* Global calculation of vertical texture gradients for the triangle */ \ - float yDu, yDv; \ + float duDy, dvDy; \ if (ENABLE_TEXTURE) { \ - yDu = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \ - yDv = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \ + duDy = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \ + dvDy = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \ } \ \ /* Initializing scanline variables */ \ @@ -2565,7 +2565,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const end = tmp; \ } \ \ - FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + FUNC_SCANLINE(tex, &start, &end, duDy, dvDy); \ \ /* Incremental update */ \ discardTL: \ @@ -2601,7 +2601,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const end = tmp; \ } \ \ - FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + FUNC_SCANLINE(tex, &start, &end, duDy, dvDy); \ \ /* Incremental update */ \ discardBL: \ @@ -2760,6 +2760,288 @@ static inline void sw_quad_clip_and_project(void) } } +static inline bool sw_quad_is_axis_aligned(void) +{ + int horizontal = 0; + int vertical = 0; + + for (int i = 0; i < 4; i++) { + const float* v0 = RLSW.vertexBuffer[i].position; + const float* v1 = RLSW.vertexBuffer[(i + 1) % 4].position; + + float dx = v1[0] - v0[0]; + float dy = v1[1] - v0[1]; + + if (fabsf(dx) > 1e-6f && fabsf(dy) < 1e-6f) { + horizontal++; + } + else if (fabsf(dy) > 1e-6f && fabsf(dx) < 1e-6f) { + vertical++; + } + else { + // Diagonal edge -> not axis-aligned + return false; + } + } + + return (horizontal == 2 && vertical == 2); +} + +static inline void sw_quad_sort_cw(const sw_vertex_t** output) +{ + // Sort 4 quad vertices into clockwise order with fixed layout: + // + // v0 -- v1 + // | | + // v3 -- v2 + // + // The goal is: + // - v0: top-left (minimum Y, then minimum X) + // - v1: top-right (same Y row as v0, maximum X) + // - v3: bottom-left (maximum Y, minimum X) + // - v2: bottom-right (maximum Y, maximum X) + + const sw_vertex_t* input = RLSW.vertexBuffer; + + // Find indices of vertices with min Y (top) and max Y (bottom) + int minYIndex = 0, maxYIndex = 0; + for (int i = 1; i < 4; i++) { + if (input[i].screen[1] < input[minYIndex].screen[1]) minYIndex = i; + if (input[i].screen[1] > input[maxYIndex].screen[1]) maxYIndex = i; + } + + // Find indices of the remaining two vertices + int others[2], idx = 0; + for (int i = 0; i < 4; i++) { + if (i != minYIndex && i != maxYIndex) { + others[idx++] = i; + } + } + + // Determine left/right among the middle vertices by X coordinate + int leftMidIndex = (input[others[0]].screen[0] < input[others[1]].screen[0]) ? others[0] : others[1]; + int rightMidIndex = (leftMidIndex == others[0]) ? others[1] : others[0]; + + // Assign top vertices: v0 = top-left, v1 = top-right + if (input[minYIndex].screen[0] < input[leftMidIndex].screen[0]) { + output[0] = &input[minYIndex]; // v0: top-left + output[1] = &input[leftMidIndex]; // v1: top-right + } else { + output[0] = &input[leftMidIndex]; + output[1] = &input[minYIndex]; + } + + // Assign bottom vertices: v3 = bottom-left, v2 = bottom-right + if (input[maxYIndex].screen[0] < input[rightMidIndex].screen[0]) { + output[3] = &input[maxYIndex]; // v3: bottom-left + output[2] = &input[rightMidIndex]; // v2: bottom-right + } else { + output[3] = &input[rightMidIndex]; + output[2] = &input[maxYIndex]; + } +} + +// REVIEW: Could a perfectly aligned quad, where one of the four points has a different depth, still appear perfectly aligned from a certain point of view? +// Because in that case, we would still need to perform perspective division for textures and colors... +#define DEFINE_QUAD_RASTER_AXIS_ALIGNED(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ +static inline void FUNC_NAME(void) \ +{ \ + const sw_vertex_t* sortedVerts[4]; \ + sw_quad_sort_cw(sortedVerts); \ + \ + const sw_vertex_t* v0 = sortedVerts[0]; \ + const sw_vertex_t* v1 = sortedVerts[1]; \ + const sw_vertex_t* v2 = sortedVerts[2]; \ + const sw_vertex_t* v3 = sortedVerts[3]; \ + \ + /* Bornes écran (axis-aligned) */ \ + \ + int xMin = (int)v0->screen[0]; \ + int yMin = (int)v0->screen[1]; \ + int xMax = (int)v2->screen[0]; \ + int yMax = (int)v2->screen[1]; \ + \ + float width = (float)(xMax - xMin); \ + float height = (float)(yMax - yMin); \ + \ + float invWidth = 1.0f / width; \ + float invHeight = 1.0f / height; \ + \ + /* Pré-calculs des coefficients pour l'interpolation bilinéaire */ \ + \ + float zA, zB, zC, zD; \ + \ + zA = v0->homogeneous[2]; \ + zB = v1->homogeneous[2] - v0->homogeneous[2]; \ + zC = v3->homogeneous[2] - v0->homogeneous[2]; \ + zD = v2->homogeneous[2] - v3->homogeneous[2] - v1->homogeneous[2] + v0->homogeneous[2]; \ + \ + float colorA[4]; \ + float colorB[4]; \ + float colorC[4]; \ + float colorD[4]; \ + \ + for (int c = 0; c < 4; c++) { \ + colorA[c] = v0->color[c]; \ + colorB[c] = v1->color[c] - v0->color[c]; \ + colorC[c] = v3->color[c] - v0->color[c]; \ + colorD[c] = v2->color[c] - v3->color[c] - v1->color[c] + v0->color[c]; \ + } \ + \ + float texA[2]; \ + float texB[2]; \ + float texC[2]; \ + float texD[2]; \ + \ + if (ENABLE_TEXTURE) \ + { \ + for (int uv = 0; uv < 2; uv++) { \ + texA[uv] = v0->texcoord[uv]; \ + texB[uv] = v1->texcoord[uv] - v0->texcoord[uv]; \ + texC[uv] = v3->texcoord[uv] - v0->texcoord[uv]; \ + texD[uv] = v2->texcoord[uv] - v3->texcoord[uv] - v1->texcoord[uv] + v0->texcoord[uv]; \ + } \ + } \ + \ + /* Pré-calcul des gradients UV (constants sur tout le quad) */ \ + \ + float duDx, dvDx, duDy, dvDy; \ + \ + if (ENABLE_TEXTURE) \ + { \ + duDx = ((v1->texcoord[0] - v0->texcoord[0]) + (v2->texcoord[0] - v3->texcoord[0])) * 0.5f * invWidth; \ + dvDx = ((v1->texcoord[1] - v0->texcoord[1]) + (v2->texcoord[1] - v3->texcoord[1])) * 0.5f * invWidth; \ + duDy = ((v3->texcoord[0] - v0->texcoord[0]) + (v2->texcoord[0] - v1->texcoord[0])) * 0.5f * invHeight; \ + dvDy = ((v3->texcoord[1] - v0->texcoord[1]) + (v2->texcoord[1] - v1->texcoord[1])) * 0.5f * invHeight; \ + } \ + \ + const sw_texture_t* tex = &RLSW.loadedTextures[RLSW.currentTexture]; \ + void* cDstBase = RLSW.framebuffer.color; \ + void* dDstBase = RLSW.framebuffer.depth; \ + int wDst = RLSW.framebuffer.width; \ + \ + for (int y = yMin; y < yMax; y++) \ + { \ + float ty = (y - yMin) * invHeight; \ + void* cptr = sw_framebuffer_get_color_addr(cDstBase, y * wDst + xMin); \ + void* dptr = sw_framebuffer_get_depth_addr(dDstBase, y * wDst + xMin); \ + \ + /* Calculer les valeurs de départ pour cette ligne (pour x = xMin) */ \ + \ + float z = zA + zC * ty; \ + \ + float srcColor[4]; \ + srcColor[0] = colorA[0] + colorC[0] * ty; \ + srcColor[1] = colorA[1] + colorC[1] * ty; \ + srcColor[2] = colorA[2] + colorC[2] * ty; \ + srcColor[3] = colorA[3] + colorC[3] * ty; \ + \ + float u, v; \ + if (ENABLE_TEXTURE) { \ + u = texA[0] + texC[0] * ty; \ + v = texA[1] + texC[1] * ty; \ + } \ + \ + /* Calcul des incréments par pixel sur X (constants pour une ligne) */ \ + \ + float zIncX = (zB + zD * ty) * invWidth; \ + \ + float colorIncX[4]; \ + colorIncX[0] = (colorB[0] + colorD[0] * ty) * invWidth; \ + colorIncX[1] = (colorB[1] + colorD[1] * ty) * invWidth; \ + colorIncX[2] = (colorB[2] + colorD[2] * ty) * invWidth; \ + colorIncX[3] = (colorB[3] + colorD[3] * ty) * invWidth; \ + \ + float uvIncX[2]; \ + if (ENABLE_TEXTURE) { \ + uvIncX[0] = (texB[0] + texD[0] * ty) * invWidth; \ + uvIncX[1] = (texB[1] + texD[1] * ty) * invWidth; \ + } \ + \ + /* Rasterisation de la ligne */ \ + \ + for (int x = xMin; x < xMax; x++) \ + { \ + /* Interpolate Z for depth testing */ \ + \ + if (ENABLE_DEPTH_TEST) \ + { \ + /* TODO: Implement different depth funcs? */ \ + float depth = sw_framebuffer_read_depth(dptr); \ + if (z > depth) goto discard; \ + } \ + \ + sw_framebuffer_write_depth(dptr, z); \ + \ + /* Calcul de la couleur du pixel */ \ + \ + float fragColor[4] = { \ + srcColor[0], \ + srcColor[1], \ + srcColor[2], \ + srcColor[3] \ + }; \ + \ + if (ENABLE_TEXTURE) \ + { \ + float texColor[4]; \ + sw_texture_sample(texColor, tex, u, v, duDx, duDy, dvDx, dvDy); \ + fragColor[0] *= texColor[0]; \ + fragColor[1] *= texColor[1]; \ + fragColor[2] *= texColor[2]; \ + fragColor[3] *= texColor[3]; \ + } \ + \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + sw_framebuffer_read_color(dstColor, cptr); \ + \ + sw_blend_colors(dstColor, fragColor); \ + dstColor[0] = sw_saturate(dstColor[0]); \ + dstColor[1] = sw_saturate(dstColor[1]); \ + dstColor[2] = sw_saturate(dstColor[2]); \ + \ + sw_framebuffer_write_color(cptr, dstColor); \ + } \ + else \ + { \ + sw_framebuffer_write_color(cptr, fragColor); \ + } \ + \ + /* Incrémenter les valeurs pour le pixel suivant */ \ + \ + discard: \ + \ + z += zIncX; \ + \ + srcColor[0] += colorIncX[0]; \ + srcColor[1] += colorIncX[1]; \ + srcColor[2] += colorIncX[2]; \ + srcColor[3] += colorIncX[3]; \ + \ + if (ENABLE_TEXTURE) { \ + u += uvIncX[0]; \ + v += uvIncX[1]; \ + } \ + \ + /* Avancer le pointeur couleur sur la ligne */ \ + \ + sw_framebuffer_inc_color_addr(&cptr); \ + sw_framebuffer_inc_depth_addr(&dptr); \ + } \ + } \ +} + +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned, 0, 0, 0) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX, 1, 0, 0) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH, 0, 1, 0) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_BLEND, 0, 0, 1) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH, 1, 1, 0) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_BLEND, 1, 0, 1) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH_BLEND, 0, 1, 1) +DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND, 1, 1, 1) + static inline void sw_quad_render(void) { if (RLSW.stateFlags & SW_STATE_CULL_FACE) { @@ -2770,7 +3052,36 @@ static inline void sw_quad_render(void) sw_quad_clip_and_project(); - if (RLSW.vertexCounter < 4) { + if (RLSW.vertexCounter < 3) { + return; + } + + if (RLSW.vertexCounter == 4 && sw_quad_is_axis_aligned()) + { + if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND(); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_quad_raster_axis_aligned_DEPTH_BLEND(); + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) { + sw_quad_raster_axis_aligned_TEX_BLEND(); + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) { + sw_quad_raster_axis_aligned_TEX_DEPTH(); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_quad_raster_axis_aligned_BLEND(); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_quad_raster_axis_aligned_DEPTH(); + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D)) { + sw_quad_raster_axis_aligned_TEX(); + } + else { + sw_quad_raster_axis_aligned(); + } return; } From 0a3e1f361eff5769311f97dc1d0dbfaec93fa206 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Fri, 16 May 2025 21:26:16 +0200 Subject: [PATCH 066/105] oops, translating some comments --- src/external/rlsw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 8e6a422b6..e203aad20 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2854,7 +2854,7 @@ static inline void FUNC_NAME(void) const sw_vertex_t* v2 = sortedVerts[2]; \ const sw_vertex_t* v3 = sortedVerts[3]; \ \ - /* Bornes écran (axis-aligned) */ \ + /* Screen bounds (axis-aligned) */ \ \ int xMin = (int)v0->screen[0]; \ int yMin = (int)v0->screen[1]; \ @@ -2867,7 +2867,7 @@ static inline void FUNC_NAME(void) float invWidth = 1.0f / width; \ float invHeight = 1.0f / height; \ \ - /* Pré-calculs des coefficients pour l'interpolation bilinéaire */ \ + /* Precomputed coefficients for bilinear interpolation */ \ \ float zA, zB, zC, zD; \ \ @@ -2903,7 +2903,7 @@ static inline void FUNC_NAME(void) } \ } \ \ - /* Pré-calcul des gradients UV (constants sur tout le quad) */ \ + /* Precomputed UV gradients (constant across the entire quad) */ \ \ float duDx, dvDx, duDy, dvDy; \ \ @@ -2926,7 +2926,7 @@ static inline void FUNC_NAME(void) void* cptr = sw_framebuffer_get_color_addr(cDstBase, y * wDst + xMin); \ void* dptr = sw_framebuffer_get_depth_addr(dDstBase, y * wDst + xMin); \ \ - /* Calculer les valeurs de départ pour cette ligne (pour x = xMin) */ \ + /* Compute starting values for this scanline (for x = xMin) */ \ \ float z = zA + zC * ty; \ \ @@ -2942,7 +2942,7 @@ static inline void FUNC_NAME(void) v = texA[1] + texC[1] * ty; \ } \ \ - /* Calcul des incréments par pixel sur X (constants pour une ligne) */ \ + /* Compute per-pixel increments along X (constant for a scanline) */ \ \ float zIncX = (zB + zD * ty) * invWidth; \ \ @@ -2958,7 +2958,7 @@ static inline void FUNC_NAME(void) uvIncX[1] = (texB[1] + texD[1] * ty) * invWidth; \ } \ \ - /* Rasterisation de la ligne */ \ + /* Scanline rasterization */ \ \ for (int x = xMin; x < xMax; x++) \ { \ @@ -2973,7 +2973,7 @@ static inline void FUNC_NAME(void) \ sw_framebuffer_write_depth(dptr, z); \ \ - /* Calcul de la couleur du pixel */ \ + /* Pixel color computation */ \ \ float fragColor[4] = { \ srcColor[0], \ @@ -3009,7 +3009,7 @@ static inline void FUNC_NAME(void) sw_framebuffer_write_color(cptr, fragColor); \ } \ \ - /* Incrémenter les valeurs pour le pixel suivant */ \ + /* Increment values for the next pixel */ \ \ discard: \ \ @@ -3025,7 +3025,7 @@ static inline void FUNC_NAME(void) v += uvIncX[1]; \ } \ \ - /* Avancer le pointeur couleur sur la ligne */ \ + /* Advance the pointers along the line */ \ \ sw_framebuffer_inc_color_addr(&cptr); \ sw_framebuffer_inc_depth_addr(&dptr); \ From 09b1c8ba8c9cb0afa946b1ea2ab8e2a0106fb4e1 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Fri, 16 May 2025 21:42:34 +0200 Subject: [PATCH 067/105] use of `restrict` for blending function parameters --- src/external/rlsw.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e203aad20..a1ce6370f 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -579,7 +579,11 @@ typedef enum { SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp } sw_pixelformat_e; -typedef void (*sw_factor_f)(float *factor, const float *src, const float *dst); +typedef void (*sw_factor_f)( + float* SW_RESTRICT factor, + const float* SW_RESTRICT src, + const float* SW_RESTRICT dst +); typedef float sw_matrix_t[4*4]; typedef uint16_t sw_half_t; @@ -2093,67 +2097,67 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa /* === Color Blending Functions === */ -static inline void sw_factor_zero(float *factor, const float *src, const float *dst) +static inline void sw_factor_zero(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = 0.0f; } -static inline void sw_factor_one(float *factor, const float *src, const float *dst) +static inline void sw_factor_one(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = 1.0f; } -static inline void sw_factor_src_color(float *factor, const float *src, const float *dst) +static inline void sw_factor_src_color(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = src[0]; factor[1] = src[1]; factor[2] = src[2]; factor[3] = src[3]; } -static inline void sw_factor_one_minus_src_color(float *factor, const float *src, const float *dst) +static inline void sw_factor_one_minus_src_color(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = 1.0f - src[0]; factor[1] = 1.0f - src[1]; factor[2] = 1.0f - src[2]; factor[3] = 1.0f - src[3]; } -static inline void sw_factor_src_alpha(float *factor, const float *src, const float *dst) +static inline void sw_factor_src_alpha(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = src[3]; } -static inline void sw_factor_one_minus_src_alpha(float *factor, const float *src, const float *dst) +static inline void sw_factor_one_minus_src_alpha(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { float inv_alpha = 1.0f - src[3]; factor[0] = factor[1] = factor[2] = factor[3] = inv_alpha; } -static inline void sw_factor_dst_alpha(float *factor, const float *src, const float *dst) +static inline void sw_factor_dst_alpha(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = dst[3]; } -static inline void sw_factor_one_minus_dst_alpha(float *factor, const float *src, const float *dst) +static inline void sw_factor_one_minus_dst_alpha(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { float inv_alpha = 1.0f - dst[3]; factor[0] = factor[1] = factor[2] = factor[3] = inv_alpha; } -static inline void sw_factor_dst_color(float *factor, const float *src, const float *dst) +static inline void sw_factor_dst_color(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = dst[0]; factor[1] = dst[1]; factor[2] = dst[2]; factor[3] = dst[3]; } -static inline void sw_factor_one_minus_dst_color(float *factor, const float *src, const float *dst) +static inline void sw_factor_one_minus_dst_color(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = 1.0f - dst[0]; factor[1] = 1.0f - dst[1]; factor[2] = 1.0f - dst[2]; factor[3] = 1.0f - dst[3]; } -static inline void sw_factor_src_alpha_saturate(float *factor, const float *src, const float *dst) +static inline void sw_factor_src_alpha_saturate(float* SW_RESTRICT factor, const float* SW_RESTRICT src, const float* SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = 1.0f; factor[3] = (src[3] < 1.0f) ? src[3] : 1.0f; } -static inline void sw_blend_colors(float dst[4], float src[4]) +static inline void sw_blend_colors(float* SW_RESTRICT dst/*[4]*/, float* SW_RESTRICT src/*[4]*/) { float srcFactor[4], dstFactor[4]; From 1820586d1fae1f268e8b920da5ec03b89433a3b6 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 14:39:08 +0200 Subject: [PATCH 068/105] update rlgl.h --- src/rlgl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rlgl.h b/src/rlgl.h index 1a757d949..4e6a73459 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -149,7 +149,8 @@ #endif // Security check in case no GRAPHICS_API_OPENGL_* defined -#if !defined(GRAPHICS_API_OPENGL_11) && \ +#if !defined(GRAPHICS_API_OPENGL_11_SOFTWARE) && \ + !defined(GRAPHICS_API_OPENGL_11) && \ !defined(GRAPHICS_API_OPENGL_21) && \ !defined(GRAPHICS_API_OPENGL_33) && \ !defined(GRAPHICS_API_OPENGL_43) && \ @@ -3705,6 +3706,7 @@ void rlCopyFramebuffer(int x, int y, int w, int h, int format, void* pixels) rlGetGlTextureFormats(format, &glInternalFormat, &glFormat, &glType); swCopyFramebuffer(x, y, w, h, glFormat, glType, pixels); } + void rlResizeFramebuffer(int width, int height) { swResizeFramebuffer(width, height); From d17116fb08fd34ae0a5ddbb78ed9d6540c59a197 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 14:42:03 +0200 Subject: [PATCH 069/105] adding `GRAPHICS_API_OPENGL_11_SOFTWARE` in `DrawMesh` --- src/rmodels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rmodels.c b/src/rmodels.c index d927f4099..68638c904 100644 --- a/src/rmodels.c +++ b/src/rmodels.c @@ -1421,7 +1421,7 @@ void UpdateMeshBuffer(Mesh mesh, int index, const void *data, int dataSize, int // Draw a 3d mesh with material and transform void DrawMesh(Mesh mesh, Material material, Matrix transform) { -#if defined(GRAPHICS_API_OPENGL_11) +#if defined(GRAPHICS_API_OPENGL_11) || defined(GRAPHICS_API_OPENGL_11_SOFTWARE) #define GL_VERTEX_ARRAY 0x8074 #define GL_NORMAL_ARRAY 0x8075 #define GL_COLOR_ARRAY 0x8076 From a03619fec0e9edbee256b4c8692f5ecc725b8be6 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 14:53:16 +0200 Subject: [PATCH 070/105] add `RL_OPENGL_11_SOFTWARE` enum --- src/rlgl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/rlgl.h b/src/rlgl.h index 4e6a73459..23d598288 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -433,7 +433,8 @@ typedef struct rlRenderBatch { // OpenGL version typedef enum { - RL_OPENGL_11 = 1, // OpenGL 1.1 + RL_OPENGL_11_SOFTWARE = 0, // Software rendering + RL_OPENGL_11, // OpenGL 1.1 RL_OPENGL_21, // OpenGL 2.1 (GLSL 120) RL_OPENGL_33, // OpenGL 3.3 (GLSL 330) RL_OPENGL_43, // OpenGL 4.3 (using GLSL 330) @@ -2659,7 +2660,9 @@ void rlLoadExtensions(void *loader) int rlGetVersion(void) { int glVersion = 0; -#if defined(GRAPHICS_API_OPENGL_11) +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + glVersion = RL_OPENGL_11_SOFTWARE; +#elif defined(GRAPHICS_API_OPENGL_11) glVersion = RL_OPENGL_11; #endif #if defined(GRAPHICS_API_OPENGL_21) From 169f7f4ed9154374bceccfa474913a82cb755e3b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 15:16:32 +0200 Subject: [PATCH 071/105] temp tweak --- src/rlgl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rlgl.h b/src/rlgl.h index 23d598288..9c57f39a8 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -849,7 +849,7 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #define SW_MALLOC(sz) RL_MALLOC(sz) #define SW_REALLOC(ptr, newSz) RL_REALLOC(ptr, newSz) #define SW_FREE(ptr) RL_FREE(ptr) - #include // OpenGL 1.1 software implementation + #include "./external/rlsw.h" // OpenGL 1.1 software implementation #else #if defined(__APPLE__) #include // OpenGL 1.1 library for OSX From 074b68a809aebdbe2564f95e28b4389e05f9e6db Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 16:07:04 +0200 Subject: [PATCH 072/105] build fixes --- src/external/rlsw.h | 2 +- src/rlgl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a1ce6370f..e36f01845 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -158,7 +158,7 @@ typedef double GLclampd; #define GL_TEXTURE 0x1702 #define GL_VERTEX_ARRAY 0x8074 -//#define GL_NORMAL_ARRAY 0x8075 +#define GL_NORMAL_ARRAY 0x8075 //< WARNING: Not implemented (defined for RLGL) #define GL_COLOR_ARRAY 0x8076 //#define GL_INDEX_ARRAY 0x8077 #define GL_TEXTURE_COORD_ARRAY 0x8078 diff --git a/src/rlgl.h b/src/rlgl.h index 9c57f39a8..a71d4def7 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -849,7 +849,7 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #define SW_MALLOC(sz) RL_MALLOC(sz) #define SW_REALLOC(ptr, newSz) RL_REALLOC(ptr, newSz) #define SW_FREE(ptr) RL_FREE(ptr) - #include "./external/rlsw.h" // OpenGL 1.1 software implementation + #include "external/rlsw.h" // OpenGL 1.1 software implementation #else #if defined(__APPLE__) #include // OpenGL 1.1 library for OSX From 9d49dbcb8d78425ef1e3c1545d095b85b327f855 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 16:07:25 +0200 Subject: [PATCH 073/105] fix DrawMesh for GL 1.1 --- src/rmodels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rmodels.c b/src/rmodels.c index 68638c904..e930ccd5e 100644 --- a/src/rmodels.c +++ b/src/rmodels.c @@ -1433,7 +1433,7 @@ void DrawMesh(Mesh mesh, Material material, Matrix transform) else rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.vertices); rlEnableStatePointer(GL_TEXTURE_COORD_ARRAY, mesh.texcoords); - if (mesh.normals) rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.animNormalss); + if (mesh.normals) rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.animNormals); else rlEnableStatePointer(GL_NORMAL_ARRAY, mesh.normals); rlEnableStatePointer(GL_COLOR_ARRAY, mesh.colors); From 5b46c6e66f2e3ef57e955209623a6f498d69fa2a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 16:18:13 +0200 Subject: [PATCH 074/105] update swClose --- src/external/rlsw.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e36f01845..86dbff569 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3879,6 +3879,7 @@ bool swInit(int w, int h) RLSW.loadedTextures[0].tWrap = SW_REPEAT; RLSW.loadedTextures[0].tx = 0.5f; RLSW.loadedTextures[0].ty = 0.5f; + RLSW.loadedTextures[0].copy = false; RLSW.loadedTextureCount = 1; @@ -3887,21 +3888,17 @@ bool swInit(int w, int h) void swClose(void) { - if (RLSW.framebuffer.color != NULL) { - SW_FREE(RLSW.framebuffer.color); + for (int i = 1; i < RLSW.loadedTextureCount; i++) { + sw_texture_t* texture = &RLSW.loadedTextures[i]; + if (sw_is_texture_valid(i) && texture->copy) { + SW_FREE(texture->pixels.ptr); + } } - if (RLSW.framebuffer.depth != NULL) { - SW_FREE(RLSW.framebuffer.depth); - } - - if (RLSW.loadedTextures != NULL) { - SW_FREE(RLSW.loadedTextures); - } - - if (RLSW.freeTextureIds != NULL) { - SW_FREE(RLSW.freeTextureIds); - } + SW_FREE(RLSW.framebuffer.color); + SW_FREE(RLSW.framebuffer.depth); + SW_FREE(RLSW.loadedTextures); + SW_FREE(RLSW.freeTextureIds); RLSW = (sw_context_t) { 0 }; } From 7ec3a5fe6e4f31d53676f8e0334eaf5e79b1d87d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 16:32:14 +0200 Subject: [PATCH 075/105] review texture format + fix copy --- src/external/rlsw.h | 83 ++++++++------------------------------------- 1 file changed, 14 insertions(+), 69 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 86dbff569..49f216692 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -566,17 +566,6 @@ typedef enum { SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) - SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp } sw_pixelformat_e; typedef void (*sw_factor_f)( @@ -1778,36 +1767,25 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) return -1; // Unsupported format } -int sw_get_pixel_bpp(sw_pixelformat_e format) +int sw_get_pixel_bytes(sw_pixelformat_e format) { int bpp = 0; switch (format) { - case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: bpp = 8; break; + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: bpp = 1; break; case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: - case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: bpp = 16; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: bpp = 32; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: bpp = 24; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R32: bpp = 32; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: bpp = 32*3; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: bpp = 32*4; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R16: bpp = 16; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: bpp = 16*3; break; - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: bpp = 16*4; break; - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: bpp = 4; break; - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: bpp = 8; break; - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: bpp = 2; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: bpp = 2; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: bpp = 4; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: bpp = 3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32: bpp = 4; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: bpp = 4*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: bpp = 4*4; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16: bpp = 2; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: bpp = 2*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: bpp = 2*4; break; default: break; } @@ -1998,19 +1976,6 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse sw_get_pixel_rgba_16161616(color, pixels, offset); break; - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: - break; - } } @@ -3973,17 +3938,7 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, sw_framebuffer_copy_to_R16G16B16A16(x, y, w, h, pixels); break; - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -4057,17 +4012,7 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, sw_framebuffer_blit_to_R16G16B16A16(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, pixels); break; - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -5025,7 +4970,7 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy sw_texture_t* texture = &RLSW.loadedTextures[id]; if (copy) { - int bpp = sw_get_pixel_bpp(pixelFormat); + int bpp = sw_get_pixel_bytes(pixelFormat); int size = bpp * width * height; texture->pixels.ptr = SW_MALLOC(size); if (texture->pixels.ptr == NULL) { From c35a74a13b0a8de6ef0da378d455fa26bbcdaefb Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 16:38:23 +0200 Subject: [PATCH 076/105] set minimum req vertices to 3 (quads) --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 49f216692..a0cedbb05 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2697,7 +2697,7 @@ static inline void sw_quad_clip_and_project(void) sw_vertex_t* polygon = RLSW.vertexBuffer; int* vertexCounter = &RLSW.vertexCounter; - if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 4) { + if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 3) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { From ea8ecb2d2151180e98cea45a4afbe274eadfe800 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 17:01:20 +0200 Subject: [PATCH 077/105] check swInit --- src/rlgl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rlgl.h b/src/rlgl.h index a71d4def7..8f6c11fbe 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -2330,7 +2330,11 @@ void rlglInit(int width, int height) #endif #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) - swInit(width, height); + if (!swInit(width, height)) + { + TRACELOG(RL_LOG_ERROR, "RLGL: Software renderer initialization failed!"); + exit(-1); + } #endif #if defined(GRAPHICS_API_OPENGL_33) || defined(GRAPHICS_API_OPENGL_ES2) From 87fae2cf3186f563597bbf6915e978c1feecf74b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 17:06:42 +0200 Subject: [PATCH 078/105] review pixelformat --- src/external/rlsw.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a0cedbb05..0b3fc0671 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -553,7 +553,8 @@ void swBindTexture(uint32_t id); /* === Internal Structs === */ typedef enum { - SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNKNOWN = 0, + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE, // 8 bit per pixel (no alpha) SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp @@ -1731,7 +1732,8 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) case SW_LUMINANCE_ALPHA: channels = 2; break; case SW_RGB: channels = 3; break; case SW_RGBA: channels = 4; break; - default: return -1; // Unknown format + default: + return SW_PIXELFORMAT_UNKNOWN; } // Determine the depth of each channel (type) @@ -1743,7 +1745,8 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) case SW_UNSIGNED_INT: bitsPerChannel = 32; break; case SW_INT: bitsPerChannel = 32; break; case SW_FLOAT: bitsPerChannel = 32; break; - default: return -1; // Unknown type + default: + return SW_PIXELFORMAT_UNKNOWN; } // Map the format and type to the correct internal format @@ -1764,7 +1767,7 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32; } - return -1; // Unsupported format + return SW_PIXELFORMAT_UNKNOWN; } int sw_get_pixel_bytes(sw_pixelformat_e format) @@ -4962,7 +4965,7 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy int pixelFormat = sw_get_pixel_format(format, type); - if (pixelFormat < 0) { + if (pixelFormat <= SW_PIXELFORMAT_UNKNOWN) { RLSW.errCode = SW_INVALID_ENUM; return; } From 6c73c62713d1927dcadaf168df7eabd1ce67dbf9 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 17:21:14 +0200 Subject: [PATCH 079/105] tweaks --- src/external/rlsw.h | 106 ++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 0b3fc0671..63b4c54af 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -250,64 +250,64 @@ typedef double GLclampd; /* === OpenGL Binding === */ -#define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer(x, y, w, h, f, t, p) -#define glEnable(state) swEnable(state) -#define glDisable(state) swDisable(state) -#define glGetFloatv(pname, params) swGetFloatv(pname, params) -#define glGetString(pname) swGetString(pname) +#define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer((x), (y), (w), (h), (f), (t), (p)) +#define glEnable(state) swEnable((state)) +#define glDisable(state) swDisable((state)) +#define glGetFloatv(pname, params) swGetFloatv((pname), (params)) +#define glGetString(pname) swGetString((pname)) #define glGetError() swGetError() -#define glViewport(x, y, w, h) swViewport(x, y, w, h) -#define glScissor(x, y, w, h) swScissor(x, y, w, h) -#define glClearColor(r, g, b, a) swClearColor(r, g, b, a) -#define glClear(bitmask) swClear(bitmask) -#define glBlendFunc(sfactor, dfactor) swBlendFunc(sfactor, dfactor) -#define glPolygonMode(face, mode) swPolygonMode(mode) -#define glCullFace(face) swCullFace(face) -#define glPointSize(size) swPointSize(size) -#define glLineWidth(width) swLineWidth(width) -#define glMatrixMode(mode) swMatrixMode(mode) +#define glViewport(x, y, w, h) swViewport((x), (y), (w), (h)) +#define glScissor(x, y, w, h) swScissor((x), (y), (w), (h)) +#define glClearColor(r, g, b, a) swClearColor((r), (g), (b), (a)) +#define glClear(bitmask) swClear((bitmask)) +#define glBlendFunc(sfactor, dfactor) swBlendFunc((sfactor), (dfactor)) +#define glPolygonMode(face, mode) swPolygonMode((mode)) +#define glCullFace(face) swCullFace((face)) +#define glPointSize(size) swPointSize((size)) +#define glLineWidth(width) swLineWidth((width)) +#define glMatrixMode(mode) swMatrixMode((mode)) #define glPushMatrix() swPushMatrix() #define glPopMatrix() swPopMatrix() #define glLoadIdentity() swLoadIdentity() -#define glTranslatef(x, y, z) swTranslatef(x, y, z) -#define glRotatef(a, x, y, z) swRotatef(a, x, y, z) -#define glScalef(x, y, z) swScalef(x, y, z) -#define glMultMatrixf(v) swMultMatrixf(v) -#define glFrustum(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) -#define glOrtho(l, r, b, t, n, f) swOrtho(l, r, b, t, n, f) -#define glBegin(mode) swBegin(mode) +#define glTranslatef(x, y, z) swTranslatef((x), (y), (z)) +#define glRotatef(a, x, y, z) swRotatef((a), (x), (y), (z)) +#define glScalef(x, y, z) swScalef((x), (y), (z)) +#define glMultMatrixf(v) swMultMatrixf((v)) +#define glFrustum(l, r, b, t, n, f) swFrustum((l), (r), (b), (t), (n), (f)) +#define glOrtho(l, r, b, t, n, f) swOrtho((l), (r), (b), (t), (n), (f)) +#define glBegin(mode) swBegin((mode)) #define glEnd() swEnd() -#define glVertex2i(x, y) swVertex2i(x, y) -#define glVertex2f(x, y) swVertex2f(x, y) -#define glVertex2fv(v) swVertex2fv(v) -#define glVertex3i(x, y, z) swVertex3i(x, y, z) -#define glVertex3f(x, y, z) swVertex3f(x, y, z) -#define glvertex3fv(v) swVertex3fv(v) -#define glVertex4i(x, y, z, w) swVertex4i(x, y, z, w) -#define glVertex4f(x, y, z, w) swVertex4f(x, y, z, w) -#define glVertex4fv(v) swVertex4fv(v) -#define glColor3ub(r, g, b) swColor3ub(r, g, b) -#define glColor3ubv(v) swColor3ubv(v) -#define glColor3f(r, g, b) swColor3f(r, g, b) -#define glColor3fv(v) swColor3fv(v) -#define glColor4ub(r, g, b, a) swColor4ub(r, g, b, a) -#define glColor4ubv(v) swColor4ubv(v) -#define glColor4f(r, g, b, a) swColor4f(r, g, b, a) -#define glColor4fv(v) swColor4fv(v) -#define glTexCoord2f(u, v) swTexCoord2f(u, v) -#define glTexCoord2fv(v) swTexCoord2fv(v) +#define glVertex2i(x, y) swVertex2i((x), (y)) +#define glVertex2f(x, y) swVertex2f((x), (y)) +#define glVertex2fv(v) swVertex2fv((v)) +#define glVertex3i(x, y, z) swVertex3i((x), (y), (z)) +#define glVertex3f(x, y, z) swVertex3f((x), (y), (z)) +#define glvertex3fv(v) swVertex3fv((v)) +#define glVertex4i(x, y, z, w) swVertex4i((x), (y), (z), (w)) +#define glVertex4f(x, y, z, w) swVertex4f((x), (y), (z), (w)) +#define glVertex4fv(v) swVertex4fv((v)) +#define glColor3ub(r, g, b) swColor3ub((r), (g), (b)) +#define glColor3ubv(v) swColor3ubv((v)) +#define glColor3f(r, g, b) swColor3f((r), (g), (b)) +#define glColor3fv(v) swColor3fv((v)) +#define glColor4ub(r, g, b, a) swColor4ub((r), (g), (b), (a)) +#define glColor4ubv(v) swColor4ubv((v)) +#define glColor4f(r, g, b, a) swColor4f((r), (g), (b), (a)) +#define glColor4fv(v) swColor4fv((v)) +#define glTexCoord2f(u, v) swTexCoord2f((u), (v)) +#define glTexCoord2fv(v) swTexCoord2fv((v)) #define glEnableClientState(t) ((void)(t)) -#define glDisableClientState(t) swBindArray(t, 0) -#define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) -#define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) -#define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) -#define glDrawArrays(m, o, c) swDrawArrays(m, o, c) -#define glGenTextures(c, v) swGenTextures(c, v) -#define glDeleteTextures(c, v) swDeleteTextures(c, v) -#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, SW_GL_BINDING_COPY_TEXTURE, p) -#define glTexParameteri(tr, pname, param) swTexParameteri(pname, param) -#define glBindTexture(tr, id) swBindTexture(id) +#define glDisableClientState(t) swBindArray((t), 0) +#define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, (p)) +#define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, (p)) +#define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, (p)) +#define glDrawArrays(m, o, c) swDrawArrays((m), (o), (c)) +#define glGenTextures(c, v) swGenTextures((c), (v)) +#define glDeleteTextures(c, v) swDeleteTextures((c), (v)) +#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D((w), (h), (f), (t), SW_GL_BINDING_COPY_TEXTURE, (p)) +#define glTexParameteri(tr, pname, param) swTexParameteri((pname), (param)) +#define glBindTexture(tr, id) swBindTexture((id)) /* === Not Implemented === */ @@ -4973,8 +4973,8 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy sw_texture_t* texture = &RLSW.loadedTextures[id]; if (copy) { - int bpp = sw_get_pixel_bytes(pixelFormat); - int size = bpp * width * height; + int bytes = sw_get_pixel_bytes(pixelFormat); + int size = bytes * width * height; texture->pixels.ptr = SW_MALLOC(size); if (texture->pixels.ptr == NULL) { RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory... From 3c25efc4123e553f11b08af8eccd0d0a0c667968 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 17:50:11 +0200 Subject: [PATCH 080/105] fix animNormals (DrawMesh) --- src/rmodels.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rmodels.c b/src/rmodels.c index e930ccd5e..8f7fe2579 100644 --- a/src/rmodels.c +++ b/src/rmodels.c @@ -1432,10 +1432,10 @@ void DrawMesh(Mesh mesh, Material material, Matrix transform) if (mesh.animVertices) rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.animVertices); else rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.vertices); - rlEnableStatePointer(GL_TEXTURE_COORD_ARRAY, mesh.texcoords); - if (mesh.normals) rlEnableStatePointer(GL_VERTEX_ARRAY, mesh.animNormals); + if (mesh.animNormals) rlEnableStatePointer(GL_NORMAL_ARRAY, mesh.animNormals); else rlEnableStatePointer(GL_NORMAL_ARRAY, mesh.normals); + rlEnableStatePointer(GL_TEXTURE_COORD_ARRAY, mesh.texcoords); rlEnableStatePointer(GL_COLOR_ARRAY, mesh.colors); rlPushMatrix(); From 5bab07b3e7e77dfb4f2162f678f85f7e5b064602 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 17:55:00 +0200 Subject: [PATCH 081/105] fallback color/texcoord (swDrawArrays) --- src/external/rlsw.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 63b4c54af..41929c273 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -4898,17 +4898,20 @@ void swDrawArrays(SWdraw mode, int offset, int count) } swBegin(mode); + { + swTexCoord2f(0.0f, 0.0f); + swColor4f(1.0f, 1.0f, 1.0f, 1.0f); - for (int i = offset; i < count; i++) { - if (RLSW.array.texcoords) { - swTexCoord2fv(RLSW.array.texcoords + 2 * i); + for (int i = offset; i < count; i++) { + if (RLSW.array.texcoords) { + swTexCoord2fv(RLSW.array.texcoords + 2 * i); + } + if (RLSW.array.colors) { + swColor4ubv(RLSW.array.colors + 4 * i); + } + swVertex3fv(RLSW.array.positions + 3 * i); } - if (RLSW.array.colors) { - swColor4ubv(RLSW.array.colors + 4 * i); - } - swVertex3fv(RLSW.array.positions + 3 * i); } - swEnd(); } @@ -5055,7 +5058,7 @@ void swBindTexture(uint32_t id) return; } - if (id > 0 && RLSW.loadedTextures[id].pixels.cptr == NULL) { + if (RLSW.loadedTextures[id].pixels.cptr == NULL) { RLSW.errCode = SW_INVALID_OPERATION; return; } From f7df285d69db195472c8708862c556dd719153a1 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 18:09:50 +0200 Subject: [PATCH 082/105] review swMultMatrixf --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 41929c273..8f0f0308d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -4553,7 +4553,7 @@ void swScalef(float x, float y, float z) void swMultMatrixf(const float* mat) { - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) { RLSW.isDirtyMVP = true; From 3913ffeac2cb7895629d01907622341ec5aec8f4 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 18:17:29 +0200 Subject: [PATCH 083/105] fix texture pool alloc.. --- src/external/rlsw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 8f0f0308d..399931343 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3787,10 +3787,10 @@ bool swInit(int w, int h) swViewport(0, 0, w, h); swScissor(0, 0, w, h); - RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES * sizeof(sw_texture_t)); if (RLSW.loadedTextures == NULL) { swClose(); return false; } - RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES * sizeof(uint32_t)); if (RLSW.loadedTextures == NULL) { swClose(); return false; } RLSW.clearColor[0] = 0.0f; From 903f56ca7be0410a7694eee4f5f22358771b4432 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 21:10:37 +0200 Subject: [PATCH 084/105] review triangle scanlines increment all data --- src/external/rlsw.h | 151 ++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 75 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 399931343..a6ab91f28 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -526,7 +526,7 @@ void swBindTexture(uint32_t id); #endif // RLSW_H - +#define RLSW_IMPL #ifdef RLSW_IMPL #include @@ -1979,6 +1979,9 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse sw_get_pixel_rgba_16161616(color, pixels, offset); break; + case SW_PIXELFORMAT_UNKNOWN: + break; + } } @@ -2335,97 +2338,103 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, const sw_vertex_t* end, float duDy, float dvDy) \ { \ /* Convert and center the screen coordinates */ \ + \ int xStart = (int)(start->screen[0] + 0.5f); \ int xEnd = (int)(end->screen[0] + 0.5f); \ int y = (int)(start->screen[1] + 0.5f); \ \ - /* Safely compute the inverse horizontal distance */ \ + /* Compute the inverse horizontal distance along the X axis */ \ + \ float dx = end->screen[0] - start->screen[0]; \ if (fabsf(dx) < 1e-6f) return; \ + float dxRcp = 1.0f / dx; \ \ - /* Calculate the interpolation step along the X axis */ \ - float dt = 1.0f / dx; \ + /* Compute the interpolation steps along the X axis */ \ \ - /* Initialize the interpolation parameter \ - 't' ranges from 0 to 1 across the scanline */ \ - float t = (xStart - start->screen[0]) * dt; \ + float dzDx = (end->homogeneous[2] - start->homogeneous[2]) * dxRcp; \ + float dwDx = (end->homogeneous[3] - start->homogeneous[3]) * dxRcp; \ + \ + float dcDx[4]; \ + dcDx[0] = (end->color[0] - start->color[0]) * dxRcp; \ + dcDx[1] = (end->color[1] - start->color[1]) * dxRcp; \ + dcDx[2] = (end->color[2] - start->color[2]) * dxRcp; \ + dcDx[3] = (end->color[3] - start->color[3]) * dxRcp; \ \ - /* Calculate the horizontal gradients for UV coordinates */ \ float duDx, dvDx; \ if (ENABLE_TEXTURE) { \ - duDx = (end->texcoord[0] - start->texcoord[0]) * dt; \ - dvDx = (end->texcoord[1] - start->texcoord[1]) * dt; \ + duDx = (end->texcoord[0] - start->texcoord[0]) * dxRcp; \ + dvDx = (end->texcoord[1] - start->texcoord[1]) * dxRcp; \ } \ \ - /* Pre-calculate the color differences for interpolation */ \ - float dcol[4]; \ - for (int i = 0; i < 4; i++) { \ - dcol[i] = end->color[i] - start->color[i]; \ - } \ + /* Initializing the interpolation starting values */ \ \ - /* Pre-calculate the differences in Z and W \ - (for depth testing and perspective correction) */ \ - float dz = end->homogeneous[2] - start->homogeneous[2]; \ - float dw = end->homogeneous[3] - start->homogeneous[3]; \ + float z = start->homogeneous[2]; \ + float w = start->homogeneous[3]; \ + \ + float color[4]; \ + color[0] = start->color[0]; \ + color[1] = start->color[1]; \ + color[2] = start->color[2]; \ + color[3] = start->color[3]; \ \ float u, v; \ if (ENABLE_TEXTURE) { \ - /* Initialize the interpolated texture coordinates */ \ - u = start->texcoord[0] + t * duDx; \ - v = start->texcoord[1] + t * dvDx; \ + u = start->texcoord[0]; \ + v = start->texcoord[1]; \ } \ \ - /* Pre-calculate the starting pointer for the color framebuffer row */ \ + /* Pre-calculate the starting pointers for the framebuffer row */ \ + \ void* cptr = sw_framebuffer_get_color_addr( \ - RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart \ - ); \ + RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart); \ \ - /* Pre-calculate the pointer for the depth buffer row */ \ void* dptr = sw_framebuffer_get_depth_addr( \ - RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart \ - ); \ + RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart); \ \ - /* Scanline rasterization loop */ \ - for (int x = xStart; x < xEnd; x++) { \ - /* Interpolate Z and W for depth testing and perspective correction */ \ - float w = 1.0f / (start->homogeneous[3] + t * dw); \ - float z = start->homogeneous[2] + t * dz; \ + /* Scanline rasterization */ \ + \ + for (int x = xStart; x < xEnd; x++) \ + { \ + /* Test and write depth */ \ \ if (ENABLE_DEPTH_TEST) \ { \ - /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) goto discard; \ } \ \ - /* Update the depth buffer */ \ sw_framebuffer_write_depth(dptr, z); \ \ + /* Pixel color computation */ \ + \ + float wRcp = 1.0f / w; \ + \ + float srcColor[4] = { \ + color[0] * wRcp, \ + color[1] * wRcp, \ + color[2] * wRcp, \ + color[3] * wRcp \ + }; \ + \ + if (ENABLE_TEXTURE) \ + { \ + float texColor[4]; \ + float s = u * wRcp; \ + float t = v * wRcp; \ + sw_texture_sample(texColor, tex, s, t, duDx, duDy, dvDx, dvDy); \ + srcColor[0] *= texColor[0]; \ + srcColor[1] *= texColor[1]; \ + srcColor[2] *= texColor[2]; \ + srcColor[3] *= texColor[3]; \ + } \ + \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, cptr); \ \ - float srcColor[4]; \ - if (ENABLE_TEXTURE) \ - { \ - sw_texture_sample(srcColor, tex, u * w, v * w, duDx, duDy, dvDx, dvDy); \ - srcColor[0] *= (start->color[0] + t * dcol[0]) * w; \ - srcColor[1] *= (start->color[1] + t * dcol[1]) * w; \ - srcColor[2] *= (start->color[2] + t * dcol[2]) * w; \ - srcColor[3] *= (start->color[3] + t * dcol[3]) * w; \ - } \ - else \ - { \ - srcColor[0] = (start->color[0] + t * dcol[0]) * w; \ - srcColor[1] = (start->color[1] + t * dcol[1]) * w; \ - srcColor[2] = (start->color[2] + t * dcol[2]) * w; \ - srcColor[3] = (start->color[3] + t * dcol[3]) * w; \ - } \ - \ sw_blend_colors(dstColor, srcColor); \ - \ dstColor[0] = sw_saturate(dstColor[0]); \ dstColor[1] = sw_saturate(dstColor[1]); \ dstColor[2] = sw_saturate(dstColor[2]); \ @@ -2434,34 +2443,26 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ else \ { \ - if (ENABLE_TEXTURE) \ - { \ - float color[4]; \ - sw_texture_sample(color, tex, u * w, v * w, duDx, duDy, dvDx, dvDy); \ - color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ - color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ - color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ - sw_framebuffer_write_color(cptr, color); \ - } \ - else \ - { \ - float color[3]; \ - color[0] = sw_saturate((start->color[0] + t * dcol[0]) * w); \ - color[1] = sw_saturate((start->color[1] + t * dcol[1]) * w); \ - color[2] = sw_saturate((start->color[2] + t * dcol[2]) * w); \ - sw_framebuffer_write_color(cptr, color); \ - } \ + sw_framebuffer_write_color(cptr, srcColor); \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ - discard: \ - t += dt; \ - sw_framebuffer_inc_color_addr(&cptr); \ - sw_framebuffer_inc_depth_addr(&dptr); \ + \ + discard: \ + \ + z += dzDx; \ + w += dwDx; \ + color[0] += dcDx[0]; \ + color[1] += dcDx[1]; \ + color[2] += dcDx[2]; \ + color[3] += dcDx[3]; \ if (ENABLE_TEXTURE) { \ u += duDx; \ v += dvDx; \ } \ + \ + sw_framebuffer_inc_color_addr(&cptr); \ + sw_framebuffer_inc_depth_addr(&dptr); \ } \ } @@ -2934,7 +2935,7 @@ static inline void FUNC_NAME(void) \ for (int x = xMin; x < xMax; x++) \ { \ - /* Interpolate Z for depth testing */ \ + /* Test and write depth */ \ \ if (ENABLE_DEPTH_TEST) \ { \ From 88c24f8de7353fbe62d8794762517fa45a069472 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 21:14:55 +0200 Subject: [PATCH 085/105] fix `sw_quad_sort_cw` --- src/external/rlsw.h | 83 ++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a6ab91f28..57cda1b91 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2764,54 +2764,75 @@ static inline void sw_quad_sort_cw(const sw_vertex_t** output) { // Sort 4 quad vertices into clockwise order with fixed layout: // - // v0 -- v1 - // | | - // v3 -- v2 + // v0 -- v1 + // | | + // v3 -- v2 // // The goal is: // - v0: top-left (minimum Y, then minimum X) - // - v1: top-right (same Y row as v0, maximum X) - // - v3: bottom-left (maximum Y, minimum X) + // - v1: top-right (minimum Y row, maximum X) // - v2: bottom-right (maximum Y, maximum X) + // - v3: bottom-left (maximum Y, minimum X) const sw_vertex_t* input = RLSW.vertexBuffer; - // Find indices of vertices with min Y (top) and max Y (bottom) - int minYIndex = 0, maxYIndex = 0; + // Separate vertices into top and bottom based on Y-coordinate + const sw_vertex_t* top[2] = {NULL, NULL}; + const sw_vertex_t* bottom[2] = {NULL, NULL}; + int topCount = 0, bottomCount = 0; + + // Find minimum and maximum Y + float minY = input[0].screen[1]; + float maxY = input[0].screen[1]; + for (int i = 1; i < 4; i++) { - if (input[i].screen[1] < input[minYIndex].screen[1]) minYIndex = i; - if (input[i].screen[1] > input[maxYIndex].screen[1]) maxYIndex = i; + if (input[i].screen[1] < minY) minY = input[i].screen[1]; + if (input[i].screen[1] > maxY) maxY = input[i].screen[1]; } - // Find indices of the remaining two vertices - int others[2], idx = 0; + // Separate vertices based on Y-coordinate for (int i = 0; i < 4; i++) { - if (i != minYIndex && i != maxYIndex) { - others[idx++] = i; + if (input[i].screen[1] == minY && topCount < 2) { + top[topCount++] = &input[i]; + } else if (input[i].screen[1] == maxY && bottomCount < 2) { + bottom[bottomCount++] = &input[i]; } } - // Determine left/right among the middle vertices by X coordinate - int leftMidIndex = (input[others[0]].screen[0] < input[others[1]].screen[0]) ? others[0] : others[1]; - int rightMidIndex = (leftMidIndex == others[0]) ? others[1] : others[0]; - - // Assign top vertices: v0 = top-left, v1 = top-right - if (input[minYIndex].screen[0] < input[leftMidIndex].screen[0]) { - output[0] = &input[minYIndex]; // v0: top-left - output[1] = &input[leftMidIndex]; // v1: top-right - } else { - output[0] = &input[leftMidIndex]; - output[1] = &input[minYIndex]; + // If we don't have enough top/bottom vertices (e.g., Y values are all different), + // classify vertices as top or bottom based on whether they're closer to minY or maxY + for (int i = 0; i < 4; i++) { + if (topCount < 2 && &input[i] != top[0] && &input[i] != bottom[0] && &input[i] != bottom[1]) { + if (fabs(input[i].screen[1] - minY) <= fabs(input[i].screen[1] - maxY)) { + top[topCount++] = &input[i]; + } + } + if (bottomCount < 2 && &input[i] != top[0] && &input[i] != top[1] && &input[i] != bottom[0]) { + if (fabs(input[i].screen[1] - maxY) < fabs(input[i].screen[1] - minY)) { + bottom[bottomCount++] = &input[i]; + } + } } - // Assign bottom vertices: v3 = bottom-left, v2 = bottom-right - if (input[maxYIndex].screen[0] < input[rightMidIndex].screen[0]) { - output[3] = &input[maxYIndex]; // v3: bottom-left - output[2] = &input[rightMidIndex]; // v2: bottom-right - } else { - output[3] = &input[rightMidIndex]; - output[2] = &input[maxYIndex]; + // Sort top vertices by X (left to right) + if (topCount == 2 && top[0]->screen[0] > top[1]->screen[0]) { + const sw_vertex_t* temp = top[0]; + top[0] = top[1]; + top[1] = temp; } + + // Sort bottom vertices by X (left to right) + if (bottomCount == 2 && bottom[0]->screen[0] > bottom[1]->screen[0]) { + const sw_vertex_t* temp = bottom[0]; + bottom[0] = bottom[1]; + bottom[1] = temp; + } + + // Assign vertices in clockwise order as per the required layout + output[0] = top[0]; // v0: top-left + output[1] = top[topCount-1]; // v1: top-right + output[2] = bottom[bottomCount-1]; // v2: bottom-right + output[3] = bottom[0]; // v3: bottom-left } // REVIEW: Could a perfectly aligned quad, where one of the four points has a different depth, still appear perfectly aligned from a certain point of view? From b9cfbebd40cdd0cf36db82bd09e4403d73a2fc40 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 21:20:13 +0200 Subject: [PATCH 086/105] impl sdl platform --- src/platforms/rcore_desktop_sdl.c | 121 +++++++++++++++++------------- 1 file changed, 70 insertions(+), 51 deletions(-) diff --git a/src/platforms/rcore_desktop_sdl.c b/src/platforms/rcore_desktop_sdl.c index 1621dd2a5..ffe1e2d9e 100644 --- a/src/platforms/rcore_desktop_sdl.c +++ b/src/platforms/rcore_desktop_sdl.c @@ -54,11 +54,13 @@ #endif #include "SDL.h" // SDL base library (window/rendered, input, timing... functionality) -#if defined(GRAPHICS_API_OPENGL_ES2) - // It seems it does not need to be included to work - //#include "SDL_opengles2.h" -#else - #include "SDL_opengl.h" // SDL OpenGL functionality (if required, instead of internal renderer) +#if !defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + #if defined(GRAPHICS_API_OPENGL_ES2) + // It seems it does not need to be included to work + //#include "SDL_opengles2.h" + #else + #include "SDL_opengl.h" // SDL OpenGL functionality (if required, instead of internal renderer) + #endif #endif //---------------------------------------------------------------------------------- @@ -1215,7 +1217,14 @@ void DisableCursor(void) // Swap back buffer with front buffer (screen drawing) void SwapScreenBuffer(void) { +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + // NOTE: We use a preprocessor condition here because `rlCopyFramebuffer` is only declared for software rendering + SDL_Surface* surface = SDL_GetWindowSurface(platform.window); + rlCopyFramebuffer(0, 0, CORE.Window.render.width, CORE.Window.render.height, PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, surface->pixels); + SDL_UpdateWindowSurface(platform.window); +#else SDL_GL_SwapWindow(platform.window); +#endif } //---------------------------------------------------------------------------------- @@ -1895,7 +1904,6 @@ int InitPlatform(void) //---------------------------------------------------------------------------- unsigned int flags = 0; flags |= SDL_WINDOW_SHOWN; - flags |= SDL_WINDOW_OPENGL; flags |= SDL_WINDOW_INPUT_FOCUS; flags |= SDL_WINDOW_MOUSE_FOCUS; flags |= SDL_WINDOW_MOUSE_CAPTURE; // Window has mouse captured @@ -1930,44 +1938,50 @@ int InitPlatform(void) // NOTE: Some OpenGL context attributes must be set before window creation - // Check selection OpenGL version - if (rlGetVersion() == RL_OPENGL_21) + if (rlGetVersion() != RL_OPENGL_11_SOFTWARE) { - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1); - } - else if (rlGetVersion() == RL_OPENGL_33) - { - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); - } - else if (rlGetVersion() == RL_OPENGL_43) - { - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); -#if defined(RLGL_ENABLE_OPENGL_DEBUG_CONTEXT) - SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); // Enable OpenGL Debug Context -#endif - } - else if (rlGetVersion() == RL_OPENGL_ES_20) // Request OpenGL ES 2.0 context - { - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); - } - else if (rlGetVersion() == RL_OPENGL_ES_30) // Request OpenGL ES 3.0 context - { - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); - } + // Add the flag telling the window to use an OpenGL context + flags |= SDL_WINDOW_OPENGL; - if (CORE.Window.flags & FLAG_MSAA_4X_HINT) - { - SDL_GL_SetAttribute(SDL_GL_MULTISAMPLEBUFFERS, 1); - SDL_GL_SetAttribute(SDL_GL_MULTISAMPLESAMPLES, 4); + // Check selection OpenGL version + if (rlGetVersion() == RL_OPENGL_21) + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1); + } + else if (rlGetVersion() == RL_OPENGL_33) + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + } + else if (rlGetVersion() == RL_OPENGL_43) + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + #if defined(RLGL_ENABLE_OPENGL_DEBUG_CONTEXT) + SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); // Enable OpenGL Debug Context + #endif + } + else if (rlGetVersion() == RL_OPENGL_ES_20) // Request OpenGL ES 2.0 context + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); + } + else if (rlGetVersion() == RL_OPENGL_ES_30) // Request OpenGL ES 3.0 context + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); + } + + if (CORE.Window.flags & FLAG_MSAA_4X_HINT) + { + SDL_GL_SetAttribute(SDL_GL_MULTISAMPLEBUFFERS, 1); + SDL_GL_SetAttribute(SDL_GL_MULTISAMPLESAMPLES, 4); + } } // Init window @@ -1978,10 +1992,12 @@ int InitPlatform(void) #endif // Init OpenGL context - platform.glContext = SDL_GL_CreateContext(platform.window); + if (rlGetVersion() != RL_OPENGL_11_SOFTWARE) + { + platform.glContext = SDL_GL_CreateContext(platform.window); + } - // Check window and glContext have been initialized successfully - if ((platform.window != NULL) && (platform.glContext != NULL)) + if ((platform.window != NULL) && ((rlGetVersion() == RL_OPENGL_11_SOFTWARE) || (platform.glContext != NULL))) { CORE.Window.ready = true; @@ -2002,8 +2018,14 @@ int InitPlatform(void) TRACELOG(LOG_INFO, " > Render size: %i x %i", CORE.Window.render.width, CORE.Window.render.height); TRACELOG(LOG_INFO, " > Viewport offsets: %i, %i", CORE.Window.renderOffset.x, CORE.Window.renderOffset.y); - if (CORE.Window.flags & FLAG_VSYNC_HINT) SDL_GL_SetSwapInterval(1); - else SDL_GL_SetSwapInterval(0); + if (platform.glContext != NULL) + { + SDL_GL_SetSwapInterval((CORE.Window.flags & FLAG_VSYNC_HINT)? 1 : 0); + + // Load OpenGL extensions + // NOTE: GL procedures address loader is required to load extensions + rlLoadExtensions(SDL_GL_GetProcAddress); + } } else { @@ -2011,9 +2033,6 @@ int InitPlatform(void) return -1; } - // Load OpenGL extensions - // NOTE: GL procedures address loader is required to load extensions - rlLoadExtensions(SDL_GL_GetProcAddress); //---------------------------------------------------------------------------- // Initialize input events system @@ -2078,7 +2097,7 @@ int InitPlatform(void) void ClosePlatform(void) { SDL_FreeCursor(platform.cursor); // Free cursor - SDL_GL_DeleteContext(platform.glContext); // Deinitialize OpenGL context + if (platform.glContext != NULL) SDL_GL_DeleteContext(platform.glContext); // Deinitialize OpenGL context SDL_DestroyWindow(platform.window); SDL_Quit(); // Deinitialize SDL internal global state } From 72e753a60abc57dbcd37a66281a4d1ef350ac510 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 17 May 2025 21:21:23 +0200 Subject: [PATCH 087/105] rm def --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 57cda1b91..da2c6d93e 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -526,7 +526,7 @@ void swBindTexture(uint32_t id); #endif // RLSW_H -#define RLSW_IMPL + #ifdef RLSW_IMPL #include From 2299b6b8e6d0683e7feb3933505fcbb4a8f47ad2 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 00:24:52 +0200 Subject: [PATCH 088/105] increase max clipped polygon vertices --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index da2c6d93e..02bea76de 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -79,7 +79,7 @@ #endif #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES -# define SW_MAX_CLIPPED_POLYGON_VERTICES 12 +# define SW_MAX_CLIPPED_POLYGON_VERTICES 24 #endif #ifndef SW_CLIP_EPSILON From 7b7ed98873358f1e6658ab3865e5ab0ed3a9daa2 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 02:07:36 +0200 Subject: [PATCH 089/105] improve triangle rasterization along Y axis improved robustness against numerical errors incremental interpolation along Y simplified function, fewer jumps --- src/external/rlsw.h | 183 +++++++++++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 69 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 02bea76de..5342d28a1 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -805,7 +805,11 @@ static inline float sw_lerp(float a, float b, float t) return a + t * (b - a); } -static inline void sw_lerp_vertex_PNTCH(sw_vertex_t* SW_RESTRICT out, const sw_vertex_t* SW_RESTRICT a, const sw_vertex_t* SW_RESTRICT b, float t) +static inline void sw_lerp_vertex_PTCH( + sw_vertex_t* SW_RESTRICT out, + const sw_vertex_t* SW_RESTRICT a, + const sw_vertex_t* SW_RESTRICT b, + float t) { const float tInv = 1.0f - t; @@ -832,6 +836,62 @@ static inline void sw_lerp_vertex_PNTCH(sw_vertex_t* SW_RESTRICT out, const sw_v out->homogeneous[3] = a->homogeneous[3] * tInv + b->homogeneous[3] * t; } +static inline void sw_get_vertex_grad_PTCH( + sw_vertex_t* SW_RESTRICT out, + const sw_vertex_t* SW_RESTRICT a, + const sw_vertex_t* SW_RESTRICT b, + float scale) +{ + // Calculate gradients for Position + out->position[0] = (b->position[0] - a->position[0]) * scale; + out->position[1] = (b->position[1] - a->position[1]) * scale; + out->position[2] = (b->position[2] - a->position[2]) * scale; + out->position[3] = (b->position[3] - a->position[3]) * scale; + + // Calculate gradients for Texture coordinates + out->texcoord[0] = (b->texcoord[0] - a->texcoord[0]) * scale; + out->texcoord[1] = (b->texcoord[1] - a->texcoord[1]) * scale; + + // Calculate gradients for Color + out->color[0] = (b->color[0] - a->color[0]) * scale; + out->color[1] = (b->color[1] - a->color[1]) * scale; + out->color[2] = (b->color[2] - a->color[2]) * scale; + out->color[3] = (b->color[3] - a->color[3]) * scale; + + // Calculate gradients for Homogeneous coordinates + out->homogeneous[0] = (b->homogeneous[0] - a->homogeneous[0]) * scale; + out->homogeneous[1] = (b->homogeneous[1] - a->homogeneous[1]) * scale; + out->homogeneous[2] = (b->homogeneous[2] - a->homogeneous[2]) * scale; + out->homogeneous[3] = (b->homogeneous[3] - a->homogeneous[3]) * scale; +} + +static inline void sw_add_vertex_grad_PTCH( + sw_vertex_t* SW_RESTRICT out, + const sw_vertex_t* SW_RESTRICT gradients) +{ + // Add gradients to Position + out->position[0] += gradients->position[0]; + out->position[1] += gradients->position[1]; + out->position[2] += gradients->position[2]; + out->position[3] += gradients->position[3]; + + // Add gradients to Texture coordinates + out->texcoord[0] += gradients->texcoord[0]; + out->texcoord[1] += gradients->texcoord[1]; + + // Add gradients to Color + out->color[0] += gradients->color[0]; + out->color[1] += gradients->color[1]; + out->color[2] += gradients->color[2]; + out->color[3] += gradients->color[3]; + + // Add gradients to Homogeneous coordinates + out->homogeneous[0] += gradients->homogeneous[0]; + out->homogeneous[1] += gradients->homogeneous[1]; + out->homogeneous[2] += gradients->homogeneous[2]; + out->homogeneous[3] += gradients->homogeneous[3]; +} + /* === Half Floating Point === */ @@ -2169,7 +2229,7 @@ static inline int sw_clip_##name( /* If transition between interior/exterior, calculate intersection point */ \ if (prevInside != currInside) { \ float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \ - sw_lerp_vertex_PNTCH(&output[outputCount++], prev, curr, t); \ + sw_lerp_vertex_PTCH(&output[outputCount++], prev, curr, t); \ } \ \ /* If current vertex inside, add it */ \ @@ -2341,7 +2401,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ int xStart = (int)(start->screen[0] + 0.5f); \ int xEnd = (int)(end->screen[0] + 0.5f); \ - int y = (int)(start->screen[1] + 0.5f); \ + int y = (int)start->screen[1]; \ \ /* Compute the inverse horizontal distance along the X axis */ \ \ @@ -2471,115 +2531,100 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const const sw_texture_t* tex) \ { \ /* Swap vertices by increasing y */ \ + \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ \ /* Extracting coordinates from the sorted vertices */ \ + \ float x0 = v0->screen[0], y0 = v0->screen[1]; \ float x1 = v1->screen[0], y1 = v1->screen[1]; \ float x2 = v2->screen[0], y2 = v2->screen[1]; \ \ /* Compute height differences */ \ + \ float h20 = y2 - y0; \ float h10 = y1 - y0; \ float h21 = y2 - y1; \ \ + if (h20 < 1e-6f) { \ + return; \ + } \ + \ /* Precompute the inverse values without additional checks */ \ - float invH20 = (h20 > 1e-6f) ? 1.0f / h20 : 0.0f; \ + \ + float invH20 = 1.0f / h20; \ float invH10 = (h10 > 1e-6f) ? 1.0f / h10 : 0.0f; \ float invH21 = (h21 > 1e-6f) ? 1.0f / h21 : 0.0f; \ \ /* Pre-calculation of slopes (dx/dy) */ \ + \ float dx02 = (x2 - x0) * invH20; \ float dx01 = (x1 - x0) * invH10; \ float dx12 = (x2 - x1) * invH21; \ \ /* Y bounds (vertical clipping) */ \ + \ int yTop = (int)(y0 + 0.5f); \ int yMiddle = (int)(y1 + 0.5f); \ int yBottom = (int)(y2 + 0.5f); \ \ - /* Global calculation of vertical texture gradients for the triangle */ \ - float duDy, dvDy; \ - if (ENABLE_TEXTURE) { \ - duDy = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \ - dvDy = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \ - } \ + /* Compute gradients for each side of the triangle */ \ + \ + sw_vertex_t vDy20, vDy10, vDy21; \ + sw_get_vertex_grad_PTCH(&vDy20, v2, v0, invH20); \ + sw_get_vertex_grad_PTCH(&vDy10, v1, v0, invH10); \ + sw_get_vertex_grad_PTCH(&vDy21, v2, v1, invH21); \ \ /* Initializing scanline variables */ \ - float xLeft = x0, xRight = x0; \ - sw_vertex_t start, end; \ + \ + sw_vertex_t vLeft = *v0; \ + vLeft.screen[0] = x0; \ + \ + sw_vertex_t vRight = *v0; \ + vRight.screen[0] = x0; \ \ /* Scanline for the upper part of the triangle */ \ - for (int y = yTop; y < yMiddle; y++) { \ \ - /* Discard the lines that are degenerate */ \ - if (fabsf(xRight - xLeft) <= 1e-6f) { \ - goto discardTL; \ + for (int y = yTop; y < yMiddle; y++) \ + { \ + vLeft.screen[1] = vRight.screen[1] = y; \ + \ + if (vLeft.screen[0] < vRight.screen[0]) { \ + FUNC_SCANLINE(tex, &vLeft, &vRight, vDy20.texcoord[0], vDy20.texcoord[1]); \ + } \ + else { \ + FUNC_SCANLINE(tex, &vRight, &vLeft, vDy20.texcoord[0], vDy20.texcoord[1]); \ } \ \ - /* Calculation of interpolation factors */ \ - float dy = (float)y - y0; \ - float t1 = dy * invH20; \ - float t2 = dy * invH10; \ + sw_add_vertex_grad_PTCH(&vLeft, &vDy20); \ + vLeft.screen[0] += dx02; \ \ - /* Vertex interpolation */ \ - sw_lerp_vertex_PNTCH(&start, v0, v2, t1); \ - sw_lerp_vertex_PNTCH(&end, v0, v1, t2); \ - start.screen[0] = xLeft; \ - start.screen[1] = (float)y; \ - end.screen[0] = xRight; \ - end.screen[1] = (float)y; \ - \ - if (xLeft > xRight) { \ - sw_vertex_t tmp = start; \ - start = end; \ - end = tmp; \ - } \ - \ - FUNC_SCANLINE(tex, &start, &end, duDy, dvDy); \ - \ - /* Incremental update */ \ - discardTL: \ - xLeft += dx02; \ - xRight += dx01; \ + sw_add_vertex_grad_PTCH(&vRight, &vDy10); \ + vRight.screen[0] += dx01; \ } \ \ /* Scanline for the lower part of the triangle */ \ - xRight = x1; /* Restart the right side from the second vertex */ \ - for (int y = yMiddle; y < yBottom; y++) { \ \ - /* Discard the lines that are degenerate */ \ - if (fabsf(xRight - xLeft) <= 1e-6f) { \ - goto discardBL; \ + vRight = *v1, vRight.screen[0] = x1; \ + \ + for (int y = yMiddle; y < yBottom; y++) \ + { \ + vLeft.screen[1] = vRight.screen[1] = y; \ + \ + if (vLeft.screen[0] < vRight.screen[0]) { \ + FUNC_SCANLINE(tex, &vLeft, &vRight, vDy20.texcoord[0], vDy20.texcoord[1]); \ + } \ + else { \ + FUNC_SCANLINE(tex, &vRight, &vLeft, vDy20.texcoord[0], vDy20.texcoord[1]); \ } \ \ - /* Calculation of interpolation factors */ \ - float dy = (float)y - y0; \ - float t1 = dy * invH20; \ - float t2 = (float)(y - y1) * invH21; \ + sw_add_vertex_grad_PTCH(&vLeft, &vDy20); \ + vLeft.screen[0] += dx02; \ \ - /* Vertex interpolation */ \ - sw_lerp_vertex_PNTCH(&start, v0, v2, t1); \ - sw_lerp_vertex_PNTCH(&end, v1, v2, t2); \ - start.screen[0] = xLeft; \ - start.screen[1] = (float)y; \ - end.screen[0] = xRight; \ - end.screen[1] = (float)y; \ - \ - if (xLeft > xRight) { \ - sw_vertex_t tmp = start; \ - start = end; \ - end = tmp; \ - } \ - \ - FUNC_SCANLINE(tex, &start, &end, duDy, dvDy); \ - \ - /* Incremental update */ \ - discardBL: \ - xLeft += dx02; \ - xRight += dx12; \ + sw_add_vertex_grad_PTCH(&vRight, &vDy21); \ + vRight.screen[0] += dx12; \ } \ } From 8624edf63dd87462e6a6f27f31ead34af3184ded Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 03:08:01 +0200 Subject: [PATCH 090/105] review current vertex data + increase max clipped polygon vertices (for extreme cases) --- src/external/rlsw.h | 57 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5342d28a1..2c466cb69 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -79,7 +79,7 @@ #endif #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES -# define SW_MAX_CLIPPED_POLYGON_VERTICES 24 +# define SW_MAX_CLIPPED_POLYGON_VERTICES 64 #endif #ifndef SW_CLIP_EPSILON @@ -648,6 +648,11 @@ typedef struct { uint8_t* colors; } array; + struct { + float texcoord[2]; + float color[4]; + } current; + sw_vertex_t vertexBuffer[SW_MAX_CLIPPED_POLYGON_VERTICES]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' @@ -3879,13 +3884,13 @@ bool swInit(int w, int h) RLSW.stackTextureCounter = 1; RLSW.isDirtyMVP = false; - RLSW.vertexBuffer[0].color[0] = 1.0f; - RLSW.vertexBuffer[0].color[1] = 1.0f; - RLSW.vertexBuffer[0].color[2] = 1.0f; - RLSW.vertexBuffer[0].color[3] = 1.0f; + RLSW.current.texcoord[0] = 0.0f; + RLSW.current.texcoord[1] = 0.0f; - RLSW.vertexBuffer[0].texcoord[0] = 0.0f; - RLSW.vertexBuffer[0].texcoord[1] = 0.0f; + RLSW.current.color[0] = 1.0f; + RLSW.current.color[1] = 1.0f; + RLSW.current.color[2] = 1.0f; + RLSW.current.color[3] = 1.0f; RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; @@ -4803,6 +4808,16 @@ void swVertex4fv(const float* v) vertex->position[i] = v[i]; } + /* --- Copy additonal vertex data --- */ + + for (int i = 0; i < 2; i++) { + vertex->texcoord[i] = RLSW.current.texcoord[i]; + } + + for (int i = 0; i < 4; i++) { + vertex->color[i] = RLSW.current.color[i]; + } + /* --- Calculation of homogeneous coordinates --- */ const sw_matrix_t* mat = &RLSW.matMVP; @@ -4826,12 +4841,8 @@ void swVertex4fv(const float* v) sw_poly_point_render(); break; } - RLSW.vertexBuffer[0] = RLSW.vertexBuffer[RLSW.reqVertices - 1]; RLSW.vertexCounter = 0; } - else { - RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; - } } void swColor3ub(uint8_t r, uint8_t g, uint8_t b) @@ -4914,7 +4925,7 @@ void swColor4f(float r, float g, float b, float a) void swColor4fv(const float* v) { for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; + RLSW.current.color[i] = v[i]; } } @@ -4922,22 +4933,16 @@ void swTexCoord2f(float u, float v) { const sw_matrix_t* mat = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; - float s = (*mat)[0]*u + (*mat)[4]*v + (*mat)[12]; - float t = (*mat)[1]*u + (*mat)[5]*v + (*mat)[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + RLSW.current.texcoord[0] = (*mat)[0] * u + (*mat)[4] * v + (*mat)[12]; + RLSW.current.texcoord[1] = (*mat)[1] * u + (*mat)[5] * v + (*mat)[13]; } void swTexCoord2fv(const float* v) { const sw_matrix_t* mat = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; - float s = (*mat)[0]*v[0] + (*mat)[4]*v[1] + (*mat)[12]; - float t = (*mat)[1]*v[0] + (*mat)[5]*v[1] + (*mat)[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + RLSW.current.texcoord[0] = (*mat)[0] * v[0] + (*mat)[4] * v[1] + (*mat)[12]; + RLSW.current.texcoord[1] = (*mat)[1] * v[0] + (*mat)[5] * v[1] + (*mat)[13]; } void swBindArray(SWarray type, void *buffer) @@ -4970,12 +4975,8 @@ void swDrawArrays(SWdraw mode, int offset, int count) swColor4f(1.0f, 1.0f, 1.0f, 1.0f); for (int i = offset; i < count; i++) { - if (RLSW.array.texcoords) { - swTexCoord2fv(RLSW.array.texcoords + 2 * i); - } - if (RLSW.array.colors) { - swColor4ubv(RLSW.array.colors + 4 * i); - } + if (RLSW.array.texcoords) swTexCoord2fv(RLSW.array.texcoords + 2 * i); + if (RLSW.array.colors) swColor4ubv(RLSW.array.colors + 4 * i); swVertex3fv(RLSW.array.positions + 3 * i); } } From 0b08e75a2e7350250bb26341f854471ac2cd7eda Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 03:49:11 +0200 Subject: [PATCH 091/105] fix and improve polygon clipping Sets the vertex count to zero when the polygon is invalid Stops clipping when the vertex count drops below 3 --- src/external/rlsw.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 2c466cb69..33895ae4a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2302,7 +2302,10 @@ static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VE #define CLIP_AGAINST_PLANE(FUNC_CLIP) \ { \ n = FUNC_CLIP(tmp, polygon, n); \ - if (n == 0) return false; \ + if (n < 3) { \ + *vertexCounter = 0; \ + return false; \ + } \ for (int i = 0; i < n; i++) { \ polygon[i] = tmp[i]; \ } \ @@ -2325,7 +2328,7 @@ static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VE *vertexCounter = n; - return n > 0; + return (n >= 3); } @@ -2366,7 +2369,7 @@ static inline void sw_triangle_clip_and_project(void) sw_vertex_t* polygon = RLSW.vertexBuffer; int* vertexCounter = &RLSW.vertexCounter; - if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 3) { + if (sw_polygon_clip(polygon, vertexCounter)) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { @@ -2751,7 +2754,7 @@ static inline void sw_quad_clip_and_project(void) sw_vertex_t* polygon = RLSW.vertexBuffer; int* vertexCounter = &RLSW.vertexCounter; - if (sw_polygon_clip(polygon, vertexCounter) && *vertexCounter >= 3) { + if (sw_polygon_clip(polygon, vertexCounter)) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { From 6a3818b34f40bc341d4d6be3c40f08cad0f3a042 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 03:49:53 +0200 Subject: [PATCH 092/105] fix gradient calculation --- src/external/rlsw.h | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 33895ae4a..a0ddaea7b 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2552,25 +2552,25 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const \ /* Compute height differences */ \ \ - float h20 = y2 - y0; \ - float h10 = y1 - y0; \ - float h21 = y2 - y1; \ + float h02 = y2 - y0; \ + float h01 = y1 - y0; \ + float h12 = y2 - y1; \ \ - if (h20 < 1e-6f) { \ + if (h02 < 1e-6f) { \ return; \ } \ \ /* Precompute the inverse values without additional checks */ \ \ - float invH20 = 1.0f / h20; \ - float invH10 = (h10 > 1e-6f) ? 1.0f / h10 : 0.0f; \ - float invH21 = (h21 > 1e-6f) ? 1.0f / h21 : 0.0f; \ + float invH02 = 1.0f / h02; \ + float invH01 = (h01 > 1e-6f) ? 1.0f / h01 : 0.0f; \ + float invH12 = (h12 > 1e-6f) ? 1.0f / h12 : 0.0f; \ \ - /* Pre-calculation of slopes (dx/dy) */ \ + /* Pre-calculation of slopes */ \ \ - float dx02 = (x2 - x0) * invH20; \ - float dx01 = (x1 - x0) * invH10; \ - float dx12 = (x2 - x1) * invH21; \ + float dx02 = (x2 - x0) * invH02; \ + float dx01 = (x1 - x0) * invH01; \ + float dx12 = (x2 - x1) * invH12; \ \ /* Y bounds (vertical clipping) */ \ \ @@ -2580,10 +2580,10 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const \ /* Compute gradients for each side of the triangle */ \ \ - sw_vertex_t vDy20, vDy10, vDy21; \ - sw_get_vertex_grad_PTCH(&vDy20, v2, v0, invH20); \ - sw_get_vertex_grad_PTCH(&vDy10, v1, v0, invH10); \ - sw_get_vertex_grad_PTCH(&vDy21, v2, v1, invH21); \ + sw_vertex_t vDy02, vDy01, vDy12; \ + sw_get_vertex_grad_PTCH(&vDy02, v0, v2, invH02); \ + sw_get_vertex_grad_PTCH(&vDy01, v0, v1, invH01); \ + sw_get_vertex_grad_PTCH(&vDy12, v1, v2, invH12); \ \ /* Initializing scanline variables */ \ \ @@ -2600,16 +2600,16 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const vLeft.screen[1] = vRight.screen[1] = y; \ \ if (vLeft.screen[0] < vRight.screen[0]) { \ - FUNC_SCANLINE(tex, &vLeft, &vRight, vDy20.texcoord[0], vDy20.texcoord[1]); \ + FUNC_SCANLINE(tex, &vLeft, &vRight, vDy02.texcoord[0], vDy02.texcoord[1]); \ } \ else { \ - FUNC_SCANLINE(tex, &vRight, &vLeft, vDy20.texcoord[0], vDy20.texcoord[1]); \ + FUNC_SCANLINE(tex, &vRight, &vLeft, vDy02.texcoord[0], vDy02.texcoord[1]); \ } \ \ - sw_add_vertex_grad_PTCH(&vLeft, &vDy20); \ + sw_add_vertex_grad_PTCH(&vLeft, &vDy02); \ vLeft.screen[0] += dx02; \ \ - sw_add_vertex_grad_PTCH(&vRight, &vDy10); \ + sw_add_vertex_grad_PTCH(&vRight, &vDy01); \ vRight.screen[0] += dx01; \ } \ \ @@ -2622,16 +2622,16 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const vLeft.screen[1] = vRight.screen[1] = y; \ \ if (vLeft.screen[0] < vRight.screen[0]) { \ - FUNC_SCANLINE(tex, &vLeft, &vRight, vDy20.texcoord[0], vDy20.texcoord[1]); \ + FUNC_SCANLINE(tex, &vLeft, &vRight, vDy02.texcoord[0], vDy02.texcoord[1]); \ } \ else { \ - FUNC_SCANLINE(tex, &vRight, &vLeft, vDy20.texcoord[0], vDy20.texcoord[1]); \ + FUNC_SCANLINE(tex, &vRight, &vLeft, vDy02.texcoord[0], vDy02.texcoord[1]); \ } \ \ - sw_add_vertex_grad_PTCH(&vLeft, &vDy20); \ + sw_add_vertex_grad_PTCH(&vLeft, &vDy02); \ vLeft.screen[0] += dx02; \ \ - sw_add_vertex_grad_PTCH(&vRight, &vDy21); \ + sw_add_vertex_grad_PTCH(&vRight, &vDy12); \ vRight.screen[0] += dx12; \ } \ } From 07e891ad24c3328c892c47abd48edd4815a77a8a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 04:01:49 +0200 Subject: [PATCH 093/105] cache texture size minus one + comments --- src/external/rlsw.h | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index a0ddaea7b..fb47575cd 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -599,20 +599,20 @@ typedef struct { void* ptr; //< WARNING: Should only be used to allocate and free data } pixels; - int width; - int height; - int format; + int width, height; //< Dimensions of the texture + int wM1, hM1; //< Dimensions minus one + int format; //< Pixel format (internal representation) - SWfilter minFilter; - SWfilter magFilter; + SWfilter minFilter; //< Minification filter + SWfilter magFilter; //< Magnification filter - SWwrap sWrap; - SWwrap tWrap; + SWwrap sWrap; //< texcoord.x wrap mode + SWwrap tWrap; //< texcoord.y wrap mode - float tx; - float ty; + float tx; //< Texel width + float ty; //< Texel height - bool copy; + bool copy; //< Flag indicating whether memory has been allocated } sw_texture_t; @@ -2068,18 +2068,18 @@ static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) static inline void sw_texture_sample_nearest(float* color, const sw_texture_t* tex, float u, float v) { int x, y; - sw_texture_map(&x, u, tex->width, tex->sWrap); - sw_texture_map(&y, v, tex->height, tex->tWrap); + sw_texture_map(&x, u, tex->wM1, tex->sWrap); + sw_texture_map(&y, v, tex->hM1, tex->tWrap); sw_get_pixel(color, tex->pixels.cptr, y * tex->width + x, tex->format); } static inline void sw_texture_sample_linear(float* color, const sw_texture_t* tex, float u, float v) { int x0, y0, x1, y1; - sw_texture_map(&x0, u, tex->width, tex->sWrap); - sw_texture_map(&y0, v, tex->height, tex->tWrap); - sw_texture_map(&x1, u + tex->tx, tex->width, tex->sWrap); - sw_texture_map(&y1, v + tex->ty, tex->height, tex->tWrap); + sw_texture_map(&x0, u, tex->wM1, tex->sWrap); + sw_texture_map(&y0, v, tex->hM1, tex->tWrap); + sw_texture_map(&x1, u + tex->tx, tex->wM1, tex->sWrap); + sw_texture_map(&y1, v + tex->ty, tex->hM1, tex->tWrap); float fx = u * (tex->width - 1) - x0; float fy = v * (tex->height - 1) - y0; @@ -3915,6 +3915,8 @@ bool swInit(int w, int h) RLSW.loadedTextures[0].pixels.cptr = defTex; RLSW.loadedTextures[0].width = 2; RLSW.loadedTextures[0].height = 2; + RLSW.loadedTextures[0].wM1 = 1; + RLSW.loadedTextures[0].hM1 = 1; RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; RLSW.loadedTextures[0].minFilter = SW_NEAREST; RLSW.loadedTextures[0].magFilter = SW_NEAREST; @@ -5064,6 +5066,8 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy texture->width = width; texture->height = height; + texture->wM1 = width - 1; + texture->hM1 = height - 1; texture->format = pixelFormat; texture->tx = 1.0f / width; texture->ty = 1.0f / height; From bb5442e8c0d0ee9ef1325d666bbe0655ec2382c4 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 04:13:30 +0200 Subject: [PATCH 094/105] tweaks --- src/external/rlsw.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index fb47575cd..7aabf71b4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -79,7 +79,13 @@ #endif #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES -# define SW_MAX_CLIPPED_POLYGON_VERTICES 64 +// Under normal circumstances, clipping a polygon can add at most one vertex per clipping plane. +// Considering the largest polygon involved is a quadrilateral (4 vertices), +// and that clipping occurs against both the frustum (6 planes) +// and the scissors (4 planes), +// the maximum number of vertices after clipping is: +// 4 (original vertices) + 6 (frustum planes) + 4 (scissors planes) = 14 +# define SW_MAX_CLIPPED_POLYGON_VERTICES 14 #endif #ifndef SW_CLIP_EPSILON @@ -2101,11 +2107,6 @@ static inline void sw_texture_sample_linear(float* color, const sw_texture_t* te static inline void sw_texture_sample(float* color, const sw_texture_t* tex, float u, float v, float duDx, float duDy, float dvDx, float dvDy) { - // TODO: It seems there are some incorrect detections depending on the context - // This is probably due to the fact that the fractions are obtained - // at the wrong moment during rasterization. It would be worth reviewing - // this, although the scanline method complicates things. - // Previous method: There is no need to compute the square root // because using the squared value, the comparison remains `L2 > 1.0f * 1.0f` //float du = sqrtf(duDx * duDx + duDy * duDy); @@ -2296,7 +2297,8 @@ DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { - sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; + static sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int n = *vertexCounter; #define CLIP_AGAINST_PLANE(FUNC_CLIP) \ From 7fbc52c0e2467393fb6addbcd5cb21afbef6aeef Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 15:53:12 +0200 Subject: [PATCH 095/105] BGRA copy support --- src/external/rlsw.h | 184 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 142 insertions(+), 42 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 7aabf71b4..1f110eafc 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -50,6 +50,10 @@ # endif #endif +#ifndef SW_GL_FRAMEBUFFER_COPY_BGRA +# define SW_GL_FRAMEBUFFER_COPY_BGRA true +#endif + #ifndef SW_GL_BINDING_COPY_TEXTURE # define SW_GL_BINDING_COPY_TEXTURE true #endif @@ -1496,7 +1500,11 @@ DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R5G6B5, uint16_t) uint8_t g6 = (color[1] * 63 + 127) / 255; uint8_t b5 = (color[2] * 31 + 127) / 255; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5; +#else // RGBA uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; +#endif *dst++ = rgb565; } @@ -1504,9 +1512,15 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R8G8B8, uint8_t) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif dst += 3; } @@ -1519,7 +1533,11 @@ DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R5G5B5A1, uint16_t) uint8_t b5 = (color[2] * 31 + 127) / 255; uint8_t a1 = color[3] >= 128 ? 1 : 0; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1; +#else // RGBA uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; +#endif *dst++ = pixel; } @@ -1532,7 +1550,11 @@ DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R4G4B4A4, uint16_t) uint8_t b4 = (color[2] * 15 + 127) / 255; uint8_t a4 = (color[3] * 15 + 127) / 255; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4; +#else // RGBA uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; +#endif *dst++ = pixel; } @@ -1540,10 +1562,16 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_U32_BEGIN(R8G8B8A8, uint8_t) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B - dst[3] = color[3]; // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif + dst[3] = color[3]; dst += 4; } @@ -1558,9 +1586,15 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R32G32B32, float) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif dst += 3; } @@ -1568,10 +1602,16 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R32G32B32A32, float) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B - dst[3] = color[3]; // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif + dst[3] = color[3]; dst += 4; } @@ -1586,9 +1626,15 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R16G16B16, sw_half_t) { - dst[0] = sw_cvt_fh(color[0]); // R - dst[1] = sw_cvt_fh(color[1]); // G - dst[2] = sw_cvt_fh(color[2]); // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = sw_cvt_fh(color[2]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[0]); +#else // RGBA + dst[0] = sw_cvt_fh(color[0]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[2]); +#endif dst += 3; } @@ -1596,10 +1642,16 @@ DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_F32_BEGIN(R16G16B16A16, sw_half_t) { - dst[0] = sw_cvt_fh(color[0]); // R - dst[1] = sw_cvt_fh(color[1]); // G - dst[2] = sw_cvt_fh(color[2]); // B - dst[3] = sw_cvt_fh(color[3]); // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = sw_cvt_fh(color[2]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[0]); +#else // RGBA + dst[0] = sw_cvt_fh(color[0]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[2]); +#endif + dst[3] = sw_cvt_fh(color[3]); dst += 4; } @@ -1680,7 +1732,11 @@ DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R5G6B5, uint16_t) uint8_t g6 = (color[1] * 63 + 127) / 255; uint8_t b5 = (color[2] * 31 + 127) / 255; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5; +#else // RGBA uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; +#endif *dst++ = rgb565; } @@ -1688,9 +1744,15 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R8G8B8, uint8_t) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif dst += 3; } @@ -1703,7 +1765,11 @@ DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R5G5B5A1, uint16_t) uint8_t b5 = (color[2] * 31 + 127) / 255; uint8_t a1 = color[3] >= 128 ? 1 : 0; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1; +#else // RGBA uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; +#endif *dst++ = pixel; } @@ -1716,7 +1782,11 @@ DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R4G4B4A4, uint16_t) uint8_t b4 = (color[2] * 15 + 127) / 255; uint8_t a4 = (color[3] * 15 + 127) / 255; +#if SW_GL_FRAMEBUFFER_COPY_BGRA + uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4; +#else // RGBA uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; +#endif *dst++ = pixel; } @@ -1724,10 +1794,16 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_U32_BEGIN(R8G8B8A8, uint8_t) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B - dst[3] = color[3]; // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif + dst[3] = color[3]; dst += 4; } @@ -1742,9 +1818,15 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R32G32B32, float) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif dst += 3; } @@ -1752,10 +1834,16 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R32G32B32A32, float) { - dst[0] = color[0]; // R - dst[1] = color[1]; // G - dst[2] = color[2]; // B - dst[3] = color[3]; // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = color[2]; + dst[1] = color[1]; + dst[2] = color[0]; +#else // RGBA + dst[0] = color[0]; + dst[1] = color[1]; + dst[2] = color[2]; +#endif + dst[3] = color[3]; dst += 4; } @@ -1770,9 +1858,15 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R16G16B16, sw_half_t) { - dst[0] = sw_cvt_fh(color[0]); // R - dst[1] = sw_cvt_fh(color[1]); // G - dst[2] = sw_cvt_fh(color[2]); // B +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = sw_cvt_fh(color[2]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[0]); +#else // RGBA + dst[0] = sw_cvt_fh(color[0]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[2]); +#endif dst += 3; } @@ -1780,10 +1874,16 @@ DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_F32_BEGIN(R16G16B16A16, sw_half_t) { - dst[0] = sw_cvt_fh(color[0]); // R - dst[1] = sw_cvt_fh(color[1]); // G - dst[2] = sw_cvt_fh(color[2]); // B - dst[3] = sw_cvt_fh(color[3]); // A +#if SW_GL_FRAMEBUFFER_COPY_BGRA + dst[0] = sw_cvt_fh(color[2]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[0]); +#else // RGBA + dst[0] = sw_cvt_fh(color[0]); + dst[1] = sw_cvt_fh(color[1]); + dst[2] = sw_cvt_fh(color[2]); +#endif + dst[3] = sw_cvt_fh(color[3]); dst += 4; } From 2e024898a9603d1e525b2f4aeafd65e4b942e543 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 15:54:06 +0200 Subject: [PATCH 096/105] adding software backend option (cmake) --- CMakeOptions.txt | 2 +- cmake/LibraryConfigurations.cmake | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeOptions.txt b/CMakeOptions.txt index 3c1c12742..32cfb814e 100644 --- a/CMakeOptions.txt +++ b/CMakeOptions.txt @@ -8,7 +8,7 @@ if(EMSCRIPTEN) endif() enum_option(PLATFORM "Desktop;Web;Android;Raspberry Pi;DRM;SDL" "Platform to build for.") -enum_option(OPENGL_VERSION "OFF;4.3;3.3;2.1;1.1;ES 2.0;ES 3.0" "Force a specific OpenGL Version?") +enum_option(OPENGL_VERSION "OFF;4.3;3.3;2.1;1.1;ES 2.0;ES 3.0;Software" "Force a specific OpenGL Version?") # Configuration options option(BUILD_EXAMPLES "Build the examples." ${PROJECT_IS_TOP_LEVEL}) diff --git a/cmake/LibraryConfigurations.cmake b/cmake/LibraryConfigurations.cmake index 00dda033a..2d4e2eb9f 100644 --- a/cmake/LibraryConfigurations.cmake +++ b/cmake/LibraryConfigurations.cmake @@ -122,6 +122,8 @@ if (NOT ${OPENGL_VERSION} MATCHES "OFF") set(GRAPHICS "GRAPHICS_API_OPENGL_ES2") elseif (${OPENGL_VERSION} MATCHES "ES 3.0") set(GRAPHICS "GRAPHICS_API_OPENGL_ES3") + elseif (${OPENGL_VERSION} MATCHES "Software") + set(GRAPHICS "GRAPHICS_API_OPENGL_11_SOFTWARE") endif () if (NOT "${SUGGESTED_GRAPHICS}" STREQUAL "" AND NOT "${SUGGESTED_GRAPHICS}" STREQUAL "${GRAPHICS}") message(WARNING "You are overriding the suggested GRAPHICS=${SUGGESTED_GRAPHICS} with ${GRAPHICS}! This may fail.") From caf6b4795f7496a53a73608504e511119856cd7e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 15:56:21 +0200 Subject: [PATCH 097/105] update Makefile --- src/Makefile | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Makefile b/src/Makefile index 833f955a9..90875131c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -235,20 +235,22 @@ endif # NOTE: By default use OpenGL 3.3 on desktop platforms ifeq ($(TARGET_PLATFORM),PLATFORM_DESKTOP_GLFW) GRAPHICS ?= GRAPHICS_API_OPENGL_33 - #GRAPHICS = GRAPHICS_API_OPENGL_11 # Uncomment to use OpenGL 1.1 - #GRAPHICS = GRAPHICS_API_OPENGL_21 # Uncomment to use OpenGL 2.1 - #GRAPHICS = GRAPHICS_API_OPENGL_43 # Uncomment to use OpenGL 4.3 - #GRAPHICS = GRAPHICS_API_OPENGL_ES2 # Uncomment to use OpenGL ES 2.0 (ANGLE) + #GRAPHICS = GRAPHICS_API_OPENGL_11_SOFTWARE # Uncomment to use software rendering + #GRAPHICS = GRAPHICS_API_OPENGL_11 # Uncomment to use OpenGL 1.1 + #GRAPHICS = GRAPHICS_API_OPENGL_21 # Uncomment to use OpenGL 2.1 + #GRAPHICS = GRAPHICS_API_OPENGL_43 # Uncomment to use OpenGL 4.3 + #GRAPHICS = GRAPHICS_API_OPENGL_ES2 # Uncomment to use OpenGL ES 2.0 (ANGLE) endif ifeq ($(TARGET_PLATFORM),PLATFORM_DESKTOP_SDL) GRAPHICS ?= GRAPHICS_API_OPENGL_33 endif ifeq ($(TARGET_PLATFORM),PLATFORM_DESKTOP_RGFW) GRAPHICS ?= GRAPHICS_API_OPENGL_33 - #GRAPHICS = GRAPHICS_API_OPENGL_11 # Uncomment to use OpenGL 1.1 - #GRAPHICS = GRAPHICS_API_OPENGL_21 # Uncomment to use OpenGL 2.1 - #GRAPHICS = GRAPHICS_API_OPENGL_43 # Uncomment to use OpenGL 4.3 - #GRAPHICS = GRAPHICS_API_OPENGL_ES2 # Uncomment to use OpenGL ES 2.0 (ANGLE) + #GRAPHICS = GRAPHICS_API_OPENGL_11_SOFTWARE # Uncomment to use software rendering + #GRAPHICS = GRAPHICS_API_OPENGL_11 # Uncomment to use OpenGL 1.1 + #GRAPHICS = GRAPHICS_API_OPENGL_21 # Uncomment to use OpenGL 2.1 + #GRAPHICS = GRAPHICS_API_OPENGL_43 # Uncomment to use OpenGL 4.3 + #GRAPHICS = GRAPHICS_API_OPENGL_ES2 # Uncomment to use OpenGL ES 2.0 (ANGLE) endif ifeq ($(TARGET_PLATFORM),PLATFORM_DRM) # On DRM OpenGL ES 2.0 must be used From 32d849f50c4ada3f165555184176522d2cbbbd9a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 18:13:10 +0200 Subject: [PATCH 098/105] fix face culling --- src/external/rlsw.h | 114 ++++++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 40 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 1f110eafc..891f33d37 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2439,31 +2439,49 @@ static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VE static inline bool sw_triangle_face_culling(void) { // NOTE: Face culling is done before clipping to avoid unnecessary computations. - // However, culling requires NDC coordinates, while clipping must be done - // in homogeneous space to correctly interpolate newly generated vertices. - // This means we need to compute 1/W twice: - // - Once before clipping for face culling. - // - Again after clipping for the new vertices. + // To handle triangles crossing the w=0 plane correctly, + // we perform the winding order test in homogeneous coordinates directly, + // before the perspective division (division by w). + // This test determines the orientation of the triangle in the (x,y,w) plane, + // which corresponds to the projected 2D winding order sign, + // even with negative w values. // Preload homogeneous coordinates into local variables const float* h0 = RLSW.vertexBuffer[0].homogeneous; const float* h1 = RLSW.vertexBuffer[1].homogeneous; const float* h2 = RLSW.vertexBuffer[2].homogeneous; - // Compute 1/w once and delay divisions - const float invW0 = 1.0f / h0[3]; - const float invW1 = 1.0f / h1[3]; - const float invW2 = 1.0f / h2[3]; + // Compute a value proportional to the signed area in the projected 2D plane, + // calculated directly using homogeneous coordinates BEFORE division by w. + // This is the determinant of the matrix formed by the (x, y, w) components + // of the vertices, which correctly captures the winding order in homogeneous + // space and its relationship to the projected 2D winding order, even with + // negative w values. + // The determinant formula used here is: + // h0.x * (h1.y * h2.w - h2.y * h1.w) + + // h1.x * (h2.y * h0.w - h0.y * h2.w) + + // h2.x * (h0.y * h1.w - h1.y * h0.w) - // Compute the signed 2D area (cross product in Z) - const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; - const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; - const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; - const float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + const float hSgnArea = + h0[0] * (h1[1] * h2[3] - h2[1] * h1[3]) + + h1[0] * (h2[1] * h0[3] - h0[1] * h2[3]) + + h2[0] * (h0[1] * h1[3] - h1[1] * h0[3]); - // Discard the triangle if it faces the culled direction + // Discard the triangle if its winding order (determined by the sign + // of the homogeneous area/determinant) matches the culled direction. + // A positive hSgnArea typically corresponds to a counter-clockwise + // winding in the projected space when all w > 0. + // This test is robust for points with w > 0 or w < 0, correctly + // capturing the change in orientation when crossing the w=0 plane. + + // The culling logic remains the same based on the signed area/determinant. + // A value of 0 for hSgnArea means the points are collinear in (x, y, w) + // space, which corresponds to a degenerate triangle projection. + // Such triangles are typically not culled by this test (0 < 0 is false, 0 > 0 is false) + // and should be handled by the clipper if necessary. return (RLSW.cullFace == SW_FRONT) - ? (sgnArea < 0) : (sgnArea > 0); + ? (hSgnArea < 0) // Cull if winding is "clockwise" in the projected sense + : (hSgnArea > 0); // Cull if winding is "counter-clockwise" in the projected sense } static inline void sw_triangle_clip_and_project(void) @@ -2815,40 +2833,56 @@ static inline void sw_triangle_render(void) static inline bool sw_quad_face_culling(void) { - // NOTE: We use Green's theorem (signed polygon area) instead of triangulation. - // This is faster but only reliable if the quad is convex and not self-intersecting. - // For face culling purposes, this approximation is acceptable. + // NOTE: Face culling is done before clipping to avoid unnecessary computations. + // To handle quads crossing the w=0 plane correctly, + // we perform the winding order test in homogeneous coordinates directly, + // before the perspective division (division by w). + // For a convex quad with vertices P0, P1, P2, P3 in sequential order, + // the winding order of the quad is the same as the winding order + // of the triangle P0 P1 P2. We use the homogeneous triangle + // winding test on this first triangle. // Preload homogeneous coordinates into local variables const float* h0 = RLSW.vertexBuffer[0].homogeneous; const float* h1 = RLSW.vertexBuffer[1].homogeneous; const float* h2 = RLSW.vertexBuffer[2].homogeneous; - const float* h3 = RLSW.vertexBuffer[3].homogeneous; - // Compute 1/w once and delay divisions - const float invW0 = 1.0f / h0[3]; - const float invW1 = 1.0f / h1[3]; - const float invW2 = 1.0f / h2[3]; - const float invW3 = 1.0f / h3[3]; + // NOTE: h3 is not needed for this test + // const float* h3 = RLSW.vertexBuffer[3].homogeneous; - // Pre-multiply to get x/w and y/w coordinates - const float x0 = h0[0] * invW0, y0 = h0[1] * invW0; - const float x1 = h1[0] * invW1, y1 = h1[1] * invW1; - const float x2 = h2[0] * invW2, y2 = h2[1] * invW2; - const float x3 = h3[0] * invW3, y3 = h3[1] * invW3; + // Compute a value proportional to the signed area of the triangle P0 P1 P2 + // in the projected 2D plane, calculated directly using homogeneous coordinates + // BEFORE division by w. + // This is the determinant of the matrix formed by the (x, y, w) components + // of the vertices P0, P1, and P2. Its sign correctly indicates the winding order + // in homogeneous space and its relationship to the projected 2D winding order, + // even with negative w values. + // The determinant formula used here is: + // h0.x * (h1.y * h2.w - h2.y * h1.w) + + // h1.x * (h2.y * h0.w - h0.y * h2.w) + + // h2.x * (h0.y * h1.w - h1.y * h0.w) - // Use Green's theorem (signed polygon area) - // area = 0.5 * sum of (xi * yi+1 - xi+1 * yi) - // The factor 0.5 is not needed here, only the sign matters. - const float sgnArea = - (x0 * y1 - x1 * y0) - + (x1 * y2 - x2 * y1) - + (x2 * y3 - x3 * y2) - + (x3 * y0 - x0 * y3); + const float hSgnArea = + h0[0] * (h1[1] * h2[3] - h2[1] * h1[3]) + + h1[0] * (h2[1] * h0[3] - h0[1] * h2[3]) + + h2[0] * (h0[1] * h1[3] - h1[1] * h0[3]); + + // Perform face culling based on the winding order determined by the sign + // of the homogeneous area/determinant of triangle P0 P1 P2. + // This test is robust for points with w > 0 or w < 0 within the triangle, + // correctly capturing the change in orientation when crossing the w=0 plane. + + // A positive hSgnArea typically corresponds to a counter-clockwise + // winding in the projected space when all w > 0. + // A value of 0 for hSgnArea means P0, P1, P2 are collinear in (x, y, w) + // space, which corresponds to a degenerate triangle projection. + // Such quads might also be degenerate or non-planar. They are typically + // not culled by this test (0 < 0 is false, 0 > 0 is false) + // and should be handled by the clipper if necessary. - // Perform face culling based on area sign return (RLSW.cullFace == SW_FRONT) - ? (sgnArea < 0.0f) : (sgnArea > 0.0f); + ? (hSgnArea < 0.0f) // Cull if winding is "clockwise" in the projected sense + : (hSgnArea > 0.0f); // Cull if winding is "counter-clockwise" in the projected sense } static inline void sw_quad_clip_and_project(void) From 9799448d8c4c8974300bb5c252b9b03c24d5e8c4 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 18:25:34 +0200 Subject: [PATCH 099/105] excluse some exemple with the software backend --- examples/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 135db49f5..665cf5ff1 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -113,6 +113,10 @@ elseif ("${PLATFORM}" STREQUAL "DRM") list(REMOVE_ITEM example_sources ${CMAKE_CURRENT_SOURCE_DIR}/others/rlgl_standalone.c) list(REMOVE_ITEM example_sources ${CMAKE_CURRENT_SOURCE_DIR}/others/raylib_opengl_interop.c) +elseif ("${OPENGL_VERSION}" STREQUAL "Software") + list(REMOVE_ITEM example_sources ${CMAKE_CURRENT_SOURCE_DIR}/others/rlgl_standalone.c) + list(REMOVE_ITEM example_sources ${CMAKE_CURRENT_SOURCE_DIR}/others/raylib_opengl_interop.c) + elseif (NOT SUPPORT_GESTURES_SYSTEM) # Items requiring gestures system list(REMOVE_ITEM example_sources ${CMAKE_CURRENT_SOURCE_DIR}/textures/textures_mouse_painting.c) From aead15762655ad3b178036b080db3d2f141afc2e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 18:37:33 +0200 Subject: [PATCH 100/105] review SW_CLAMP case in sw_texture_map --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 891f33d37..e7ec43ff9 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2166,7 +2166,7 @@ static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) *out = (int)(sw_fract(in) * max + 0.5f); break; case SW_CLAMP: - *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); + *out = (int)(sw_saturate(in) * max + 0.5f); break; } } From dd48df432ed4259a0db0a14cd8a7b14f3e63572e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 18:51:17 +0200 Subject: [PATCH 101/105] review sw_saturate --- src/external/rlsw.h | 60 ++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index e7ec43ff9..212825080 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -753,59 +753,31 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma static inline float sw_saturate(float x) { - // Clamps a floating point value between 0.0 and 1.0 + union { float f; uint32_t u; } fb; + fb.f = x; - // This implementation uses IEEE 754 bit manipulation: - // - Uses the sign bit to detect negative values - // - Directly compares with binary representation of 1.0f to detect values > 1.0 + const uint32_t ZERO_BITS = 0x00000000; // Bit pattern of 0.0f + const uint32_t ONE_BITS = 0x3F800000; // Bit pattern of 1.0f (exp = 127, mantissa = 0) - // Use union to access the bits of the float as an unsigned int - union { float f; uint32_t u; } v; - v.f = x; + // Check if x < 0.0f + // If sign bit is set (MSB), x is negative + if ((fb.u & 0x80000000) != 0) { + return 0.0f; + } - // Check sign bit (bit 31): if set, x is negative, return 0.0f - if (v.u & 0x80000000) return 0.0f; + // Check if x > 1.0f + // Works for positive floats: IEEE 754 ordering matches integer ordering + if (fb.u > ONE_BITS) { + return 1.0f; + } - // Extract the unsigned magnitude (exponent + mantissa bits) - uint32_t expMantissa = v.u & 0x7FFFFFFF; - - // If magnitude > binary representation of 1.0f (0x3F800000), return 1.0f - // This efficiently handles all values > 1.0f without additional computation - if (expMantissa > 0x3F800000) return 1.0f; - - // Value is between 0.0f and 1.0f inclusive, return unchanged + // x is in [0.0f, 1.0f] return x; } static inline float sw_fract(float x) { - // Computes the positive fractional part of a float. - // Equivalent to fabs(x) - floorf(fabs(x)). - // Uses IEEE 754 bit tricks for efficiency and edge case handling. - - union { float f; uint32_t u; } v; - v.f = x; - - // Get absolute value bits (clear sign bit) - uint32_t abs_bits = v.u & 0x7FFFFFFF; - - // Case 1: |x| < 1.0f -> integer part is 0, return |x| - if (abs_bits < 0x3F800000) { - v.u = abs_bits; // Ensure positive result - return v.f; - } - - // Case 2: |x| ≥ 2^24 -> float is an exact integer, return 0.0f - // Also handles Inf and NaN as 0.0f - if (abs_bits >= 0x4B000000) { - return 0.0f; - } - - // Case 3: 1.0f ≤ |x| < 2^24 -> compute |x| - floor(|x|) - v.u = abs_bits; - float abs_x = v.f; - - return abs_x - floorf(abs_x); + return x - floorf(x); } static inline int sw_clampi(int v, int min, int max) From e75329ab8579176cf08c93813a4b19824161bcc0 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 21:17:23 +0200 Subject: [PATCH 102/105] review line raster --- src/external/rlsw.h | 153 +++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 73 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 212825080..0f2952bbd 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3376,133 +3376,140 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int x2 = (int)(v1->screen[0] + 0.5f); \ int y2 = (int)(v1->screen[1] + 0.5f); \ \ - float z1 = v0->homogeneous[2]; \ - float z2 = v1->homogeneous[2]; \ + int dx = x2 - x1; \ + int dy = y2 - y1; \ \ - int shortLen = y2 - y1; \ - int longLen = x2 - x1; \ - bool yLonger = 0; \ + /* Handling of lines that are more horizontal or vertical */ \ \ - if (abs(shortLen) > abs(longLen)) { \ - int tmp = shortLen; \ - shortLen = longLen; \ - longLen = tmp; \ - yLonger = 1; \ + if (dx == 0 && dy == 0) { \ + /* TODO: A point should be rendered here */ \ + return; \ } \ \ - float invEndVal = 1.0f / longLen; \ - int endVal = longLen; \ - int sgnInc = 1; \ + bool yLonger = (abs(dy) > abs(dx)); \ + int longLen, shortLen; \ \ - if (longLen < 0) { \ - longLen = -longLen; \ - sgnInc = -1; \ + if (yLonger) { \ + longLen = dy; \ + shortLen = dx; \ + } else { \ + longLen = dx; \ + shortLen = dy; \ } \ \ - int decInc = (longLen == 0) ? 0 \ - : (shortLen << 16) / longLen; \ + /* Handling of traversal direction */ \ + \ + int sgnInc = (longLen < 0) ? -1 : 1; \ + int abslongLen = abs(longLen); \ + \ + /* Calculation of the increment step for the shorter coordinate */ \ + \ + int decInc = 0; \ + if (abslongLen != 0) { \ + decInc = (shortLen << 16) / abslongLen; \ + } \ + \ + float longLenRcp = (abslongLen != 0) ? (1.0f / abslongLen) : 0.0f; \ + \ + /* Calculation of interpolation steps */ \ + \ + const float zStep = (v1->homogeneous[2] - v0->homogeneous[2]) * longLenRcp; \ + const float rStep = (v1->color[0] - v0->color[0]) * longLenRcp; \ + const float gStep = (v1->color[1] - v0->color[1]) * longLenRcp; \ + const float bStep = (v1->color[2] - v0->color[2]) * longLenRcp; \ + const float aStep = (v1->color[3] - v0->color[3]) * longLenRcp; \ + \ + float z = v0->homogeneous[2]; \ + \ + float color[4] = { \ + v0->color[0], \ + v0->color[1], \ + v0->color[2], \ + v0->color[3] \ + }; \ \ const int fbWidth = RLSW.framebuffer.width; \ - const float zDiff = z2 - z1; \ - \ - uint8_t* colorBuffer = RLSW.framebuffer.color; \ - uint16_t* depthBuffer = RLSW.framebuffer.depth; \ + void* cBuffer = RLSW.framebuffer.color; \ + void* dBuffer = RLSW.framebuffer.depth; \ \ int j = 0; \ - if (yLonger) { \ - for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ - float t = (float)i * invEndVal; \ \ - int x = x1 + (j >> 16); \ - int y = y1 + i; \ - float z = z1 + t * zDiff; \ - int offset = y * fbWidth + x; \ + if (yLonger) \ + { \ + for (int i = 0; i != longLen; i += sgnInc) \ + { \ + int offset = (y1 + i) * fbWidth + (x1 + (j >> 16)); \ \ - void* dptr = sw_framebuffer_get_depth_addr( \ - depthBuffer, offset \ - ); \ + void* dptr = sw_framebuffer_get_depth_addr(dBuffer, offset); \ \ if (ENABLE_DEPTH_TEST) { \ float depth = sw_framebuffer_read_depth(dptr); \ - if (z > depth) continue; \ + if (z > depth) goto discardA; \ } \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_color_addr( \ - colorBuffer, offset \ - ); \ + void* cptr = sw_framebuffer_get_color_addr(cBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, cptr); \ \ - float srcColor[4]; \ - srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ - srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ - srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ - \ - sw_blend_colors(dstColor, srcColor); \ + sw_blend_colors(dstColor, color); \ sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ - float color[3]; \ - color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ - color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ - color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ sw_framebuffer_write_color(cptr, color); \ } \ + \ + discardA: \ + j += decInc; \ + z += zStep; \ + color[0] += rStep; \ + color[1] += gStep; \ + color[2] += bStep; \ + color[3] += aStep; \ } \ } \ - else { \ - for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ - float t = (float)i * invEndVal; \ + else \ + { \ + for (int i = 0; i != longLen; i += sgnInc) \ + { \ + int offset = (y1 + (j >> 16)) * fbWidth + (x1 + i); \ \ - int x = x1 + i; \ - int y = y1 + (j >> 16); \ - float z = z1 + t * zDiff; \ - int offset = y * fbWidth + x; \ - \ - void* dptr = sw_framebuffer_get_depth_addr( \ - depthBuffer, offset \ - ); \ + void* dptr = sw_framebuffer_get_depth_addr(dBuffer, offset); \ \ if (ENABLE_DEPTH_TEST) { \ float depth = sw_framebuffer_read_depth(dptr); \ - if (z > depth) continue; \ + if (z > depth) goto discardB; \ } \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_color_addr( \ - colorBuffer, offset \ - ); \ + void* cptr = sw_framebuffer_get_color_addr(cBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, cptr); \ \ - float srcColor[4]; \ - srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ - srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ - srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ - \ - sw_blend_colors(dstColor, srcColor); \ + sw_blend_colors(dstColor, color); \ sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ - float color[3]; \ - color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ - color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ - color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ sw_framebuffer_write_color(cptr, color); \ } \ + \ + discardB: \ + j += decInc; \ + z += zStep; \ + color[0] += rStep; \ + color[1] += gStep; \ + color[2] += bStep; \ + color[3] += aStep; \ } \ } \ } From a71ec577ac398d32d4dd25c55e7c9fce48af5c53 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 21:35:22 +0200 Subject: [PATCH 103/105] fix sw_quad_is_aligned --- src/external/rlsw.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 0f2952bbd..be2d35b0a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2899,7 +2899,12 @@ static inline bool sw_quad_is_axis_aligned(void) int horizontal = 0; int vertical = 0; - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) + { + if (RLSW.vertexBuffer[i].homogeneous[3] != 1.0f) { + return false; + } + const float* v0 = RLSW.vertexBuffer[i].position; const float* v1 = RLSW.vertexBuffer[(i + 1) % 4].position; From bd2179db393a1e6f45f60ce6a6df3dddbb5749aa Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 22:29:26 +0200 Subject: [PATCH 104/105] review sw_raster_quad_axis_aligned --- src/external/rlsw.h | 190 ++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 104 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index be2d35b0a..ba8a03c1c 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3016,107 +3016,80 @@ static inline void FUNC_NAME(void) \ /* Screen bounds (axis-aligned) */ \ \ - int xMin = (int)v0->screen[0]; \ - int yMin = (int)v0->screen[1]; \ - int xMax = (int)v2->screen[0]; \ - int yMax = (int)v2->screen[1]; \ + int xMin = (int)(v0->screen[0] + 0.5f); \ + int yMin = (int)(v0->screen[1] + 0.5f); \ + int xMax = (int)(v2->screen[0] + 0.5f); \ + int yMax = (int)(v2->screen[1] + 0.5f); \ \ - float width = (float)(xMax - xMin); \ - float height = (float)(yMax - yMin); \ + int width = xMax - xMin; \ + int height = yMax - yMin; \ \ - float invWidth = 1.0f / width; \ - float invHeight = 1.0f / height; \ + if (width == 0 || height == 0) return; \ \ - /* Precomputed coefficients for bilinear interpolation */ \ + float wRcp = (width > 0.0f) ? 1.0f / width : 0.0f; \ + float hRcp = (height > 0.0f) ? 1.0f / height : 0.0f; \ \ - float zA, zB, zC, zD; \ + /* Calculation of vertex gradients in X and Y */ \ \ - zA = v0->homogeneous[2]; \ - zB = v1->homogeneous[2] - v0->homogeneous[2]; \ - zC = v3->homogeneous[2] - v0->homogeneous[2]; \ - zD = v2->homogeneous[2] - v3->homogeneous[2] - v1->homogeneous[2] + v0->homogeneous[2]; \ - \ - float colorA[4]; \ - float colorB[4]; \ - float colorC[4]; \ - float colorD[4]; \ - \ - for (int c = 0; c < 4; c++) { \ - colorA[c] = v0->color[c]; \ - colorB[c] = v1->color[c] - v0->color[c]; \ - colorC[c] = v3->color[c] - v0->color[c]; \ - colorD[c] = v2->color[c] - v3->color[c] - v1->color[c] + v0->color[c]; \ + float tcDx[2], tcDy[2]; \ + if (ENABLE_TEXTURE) { \ + tcDx[0] = (v1->texcoord[0] - v0->texcoord[0]) * wRcp; \ + tcDx[1] = (v1->texcoord[1] - v0->texcoord[1]) * wRcp; \ + tcDy[0] = (v3->texcoord[0] - v0->texcoord[0]) * hRcp; \ + tcDy[1] = (v3->texcoord[1] - v0->texcoord[1]) * hRcp; \ } \ \ - float texA[2]; \ - float texB[2]; \ - float texC[2]; \ - float texD[2]; \ + float cDx[4], cDy[4]; \ + cDx[0] = (v1->color[0] - v0->color[0]) * wRcp; \ + cDx[1] = (v1->color[1] - v0->color[1]) * wRcp; \ + cDx[2] = (v1->color[2] - v0->color[2]) * wRcp; \ + cDx[3] = (v1->color[3] - v0->color[3]) * wRcp; \ + cDy[0] = (v3->color[0] - v0->color[0]) * hRcp; \ + cDy[1] = (v3->color[1] - v0->color[1]) * hRcp; \ + cDy[2] = (v3->color[2] - v0->color[2]) * hRcp; \ + cDy[3] = (v3->color[3] - v0->color[3]) * hRcp; \ \ - if (ENABLE_TEXTURE) \ - { \ - for (int uv = 0; uv < 2; uv++) { \ - texA[uv] = v0->texcoord[uv]; \ - texB[uv] = v1->texcoord[uv] - v0->texcoord[uv]; \ - texC[uv] = v3->texcoord[uv] - v0->texcoord[uv]; \ - texD[uv] = v2->texcoord[uv] - v3->texcoord[uv] - v1->texcoord[uv] + v0->texcoord[uv]; \ - } \ + float zDx, zDy; \ + zDx = (v1->homogeneous[2] - v0->homogeneous[2]) * wRcp; \ + zDy = (v3->homogeneous[2] - v0->homogeneous[2]) * hRcp; \ + \ + /* Start of quad rasterization */ \ + \ + const sw_texture_t* tex; \ + if (ENABLE_TEXTURE) { \ + tex = &RLSW.loadedTextures[RLSW.currentTexture]; \ } \ \ - /* Precomputed UV gradients (constant across the entire quad) */ \ - \ - float duDx, dvDx, duDy, dvDy; \ - \ - if (ENABLE_TEXTURE) \ - { \ - duDx = ((v1->texcoord[0] - v0->texcoord[0]) + (v2->texcoord[0] - v3->texcoord[0])) * 0.5f * invWidth; \ - dvDx = ((v1->texcoord[1] - v0->texcoord[1]) + (v2->texcoord[1] - v3->texcoord[1])) * 0.5f * invWidth; \ - duDy = ((v3->texcoord[0] - v0->texcoord[0]) + (v2->texcoord[0] - v1->texcoord[0])) * 0.5f * invHeight; \ - dvDy = ((v3->texcoord[1] - v0->texcoord[1]) + (v2->texcoord[1] - v1->texcoord[1])) * 0.5f * invHeight; \ - } \ - \ - const sw_texture_t* tex = &RLSW.loadedTextures[RLSW.currentTexture]; \ void* cDstBase = RLSW.framebuffer.color; \ void* dDstBase = RLSW.framebuffer.depth; \ int wDst = RLSW.framebuffer.width; \ \ + float zScanline = v0->homogeneous[2]; \ + float uScanline = v0->texcoord[0]; \ + float vScanline = v0->texcoord[1]; \ + \ + float colorScanline[4] = { \ + v0->color[0], \ + v0->color[1], \ + v0->color[2], \ + v0->color[3] \ + }; \ + \ for (int y = yMin; y < yMax; y++) \ { \ - float ty = (y - yMin) * invHeight; \ void* cptr = sw_framebuffer_get_color_addr(cDstBase, y * wDst + xMin); \ void* dptr = sw_framebuffer_get_depth_addr(dDstBase, y * wDst + xMin); \ \ - /* Compute starting values for this scanline (for x = xMin) */ \ + float z = zScanline; \ + float u = uScanline; \ + float v = vScanline; \ \ - float z = zA + zC * ty; \ - \ - float srcColor[4]; \ - srcColor[0] = colorA[0] + colorC[0] * ty; \ - srcColor[1] = colorA[1] + colorC[1] * ty; \ - srcColor[2] = colorA[2] + colorC[2] * ty; \ - srcColor[3] = colorA[3] + colorC[3] * ty; \ - \ - float u, v; \ - if (ENABLE_TEXTURE) { \ - u = texA[0] + texC[0] * ty; \ - v = texA[1] + texC[1] * ty; \ - } \ - \ - /* Compute per-pixel increments along X (constant for a scanline) */ \ - \ - float zIncX = (zB + zD * ty) * invWidth; \ - \ - float colorIncX[4]; \ - colorIncX[0] = (colorB[0] + colorD[0] * ty) * invWidth; \ - colorIncX[1] = (colorB[1] + colorD[1] * ty) * invWidth; \ - colorIncX[2] = (colorB[2] + colorD[2] * ty) * invWidth; \ - colorIncX[3] = (colorB[3] + colorD[3] * ty) * invWidth; \ - \ - float uvIncX[2]; \ - if (ENABLE_TEXTURE) { \ - uvIncX[0] = (texB[0] + texD[0] * ty) * invWidth; \ - uvIncX[1] = (texB[1] + texD[1] * ty) * invWidth; \ - } \ + float color[4] = { \ + colorScanline[0], \ + colorScanline[1], \ + colorScanline[2], \ + colorScanline[3] \ + }; \ \ /* Scanline rasterization */ \ \ @@ -3135,21 +3108,21 @@ static inline void FUNC_NAME(void) \ /* Pixel color computation */ \ \ - float fragColor[4] = { \ - srcColor[0], \ - srcColor[1], \ - srcColor[2], \ - srcColor[3] \ + float srcColor[4] = { \ + color[0], \ + color[1], \ + color[2], \ + color[3] \ }; \ \ if (ENABLE_TEXTURE) \ { \ float texColor[4]; \ - sw_texture_sample(texColor, tex, u, v, duDx, duDy, dvDx, dvDy); \ - fragColor[0] *= texColor[0]; \ - fragColor[1] *= texColor[1]; \ - fragColor[2] *= texColor[2]; \ - fragColor[3] *= texColor[3]; \ + sw_texture_sample(texColor, tex, u, v, tcDx[0], tcDy[0], tcDx[1], tcDy[1]); \ + srcColor[0] *= texColor[0]; \ + srcColor[1] *= texColor[1]; \ + srcColor[2] *= texColor[2]; \ + srcColor[3] *= texColor[3]; \ } \ \ if (ENABLE_COLOR_BLEND) \ @@ -3157,38 +3130,47 @@ static inline void FUNC_NAME(void) float dstColor[4]; \ sw_framebuffer_read_color(dstColor, cptr); \ \ - sw_blend_colors(dstColor, fragColor); \ + sw_blend_colors(dstColor, srcColor); \ dstColor[0] = sw_saturate(dstColor[0]); \ dstColor[1] = sw_saturate(dstColor[1]); \ dstColor[2] = sw_saturate(dstColor[2]); \ + dstColor[3] = sw_saturate(dstColor[3]); \ \ sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ - sw_framebuffer_write_color(cptr, fragColor); \ + sw_framebuffer_write_color(cptr, srcColor); \ } \ \ - /* Increment values for the next pixel */ \ - \ discard: \ \ - z += zIncX; \ + z += zDx; \ \ - srcColor[0] += colorIncX[0]; \ - srcColor[1] += colorIncX[1]; \ - srcColor[2] += colorIncX[2]; \ - srcColor[3] += colorIncX[3]; \ + color[0] += cDx[0]; \ + color[1] += cDx[1]; \ + color[2] += cDx[2]; \ + color[3] += cDx[3]; \ \ if (ENABLE_TEXTURE) { \ - u += uvIncX[0]; \ - v += uvIncX[1]; \ + u += tcDx[0]; \ + v += tcDx[1]; \ } \ \ - /* Advance the pointers along the line */ \ - \ sw_framebuffer_inc_color_addr(&cptr); \ sw_framebuffer_inc_depth_addr(&dptr); \ + } \ + \ + zScanline += zDy; \ + \ + colorScanline[0] += cDy[0]; \ + colorScanline[1] += cDy[1]; \ + colorScanline[2] += cDy[2]; \ + colorScanline[3] += cDy[3]; \ + \ + if (ENABLE_TEXTURE) { \ + uScanline += tcDy[0]; \ + vScanline += tcDy[1]; \ } \ } \ } From 87a8e04a7bd84ceaa71a37f1a57cd748625c262a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 18 May 2025 22:35:28 +0200 Subject: [PATCH 105/105] tweaks --- src/external/rlsw.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index ba8a03c1c..51f7c3ba0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1032,7 +1032,7 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) dst[3] = 255; } -static inline void sw_framebuffer_write_color(void* dst, float color[3]) +static inline void sw_framebuffer_write_color(void* dst, const float color[3]) { uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; @@ -1041,7 +1041,7 @@ static inline void sw_framebuffer_write_color(void* dst, float color[3]) ((uint8_t*)dst)[0] = (r << 5) | (g << 2) | b; } -static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) +static inline void sw_framebuffer_fill_color(void* ptr, int size, const float color[3]) { uint8_t r8 = (uint8_t)(color[0] * 7.0f + 0.5f); uint8_t g8 = (uint8_t)(color[1] * 7.0f + 0.5f); @@ -1092,7 +1092,7 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) dst[3] = 255; } -static inline void sw_framebuffer_write_color(void* dst, float color[3]) +static inline void sw_framebuffer_write_color(void* dst, const float color[3]) { uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; @@ -1101,7 +1101,7 @@ static inline void sw_framebuffer_write_color(void* dst, float color[3]) ((uint16_t*)dst)[0] = (r << 11) | (g << 5) | b; } -static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) +static inline void sw_framebuffer_fill_color(void* ptr, int size, const float color[3]) { uint16_t r16 = (uint16_t)(color[0] * 31.0f + 0.5f); uint16_t g_16 = (uint16_t)(color[1] * 63.0f + 0.5f); @@ -1144,14 +1144,14 @@ static inline void sw_framebuffer_read_color8(uint8_t dst[4], const void* src) dst[3] = 255; } -static inline void sw_framebuffer_write_color(void* dst, float color[3]) +static inline void sw_framebuffer_write_color(void* dst, const float color[3]) { ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); ((uint8_t*)dst)[1] = (uint8_t)(color[1] * UINT8_MAX); ((uint8_t*)dst)[2] = (uint8_t)(color[2] * UINT8_MAX); } -static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) +static inline void sw_framebuffer_fill_color(void* ptr, int size, const float color[3]) { uint8_t r = (uint8_t)(color[0] * 255.0f); uint8_t g = (uint8_t)(color[1] * 255.0f); @@ -2266,7 +2266,7 @@ static inline void sw_factor_src_alpha_saturate(float* SW_RESTRICT factor, const factor[3] = (src[3] < 1.0f) ? src[3] : 1.0f; } -static inline void sw_blend_colors(float* SW_RESTRICT dst/*[4]*/, float* SW_RESTRICT src/*[4]*/) +static inline void sw_blend_colors(float* SW_RESTRICT dst/*[4]*/, const float* SW_RESTRICT src/*[4]*/) { float srcFactor[4], dstFactor[4]; @@ -3631,7 +3631,7 @@ static inline bool sw_point_clip_and_project(sw_vertex_t* v) } #define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ -static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ +static inline void FUNC_NAME(int x, int y, float z, const float color[4]) \ { \ if (CHECK_BOUNDS == 1) \ { \ @@ -3683,7 +3683,7 @@ static inline void FUNC_NAME(sw_vertex_t* v) \ int cy = v->screen[1]; \ float cz = v->homogeneous[2]; \ int radius = RLSW.pointRadius; \ - float* color = v->color; \ + const float* color = v->color; \ \ int x = 0; \ int y = radius; \