Logo

index : raylib-jai

Bindings from https://solarium.technology

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/raylib-jai.git/html/Raylib/raylib/src/external/rlsw.h blob: 80fb02c4f968b8ecdccd1f92d71354ec94f79847 [raw] [clear marker]

        
0/**********************************************************************************************
1*
2* rlsw v1.0 - An OpenGL 1.1-style software renderer implementation
3*
4* DESCRIPTION:
5* rlsw is a custom OpenGL 1.1-style implementation on software, intended to provide all
6* functionality available on rlgl.h library used by raylib, becoming a direct software
7* rendering replacement for OpenGL 1.1 backend and allowing to run raylib on GPU-less
8* devices when required
9*
10* FEATURES:
11* - Rendering to custom internal framebuffer with multiple color modes supported:
12* - Color buffer: RGB - 8-bit (3:3:2) | RGB - 16-bit (5:6:5) | RGB - 24-bit (8:8:8)
13* - Depth buffer: D - 8-bit (unorm) | D - 16-bit (unorm) | D - 24-bit (unorm)
14* - Rendering modes supported: POINT, LINES, TRIANGLE, QUADS
15* - Additional features: Polygon modes, Point width, Line width
16* - Clipping support for all rendering modes
17* - Texture features supported:
18* - All uncompressed texture formats supported by raylib
19* - Texture Minification/Magnification checks
20* - Point and Bilinear filtering
21* - Texture Wrap Modes with separate checks for S/T coordinates
22* - Vertex Arrays support with direct primitive drawing mode
23* - Matrix Stack support (Matrix Push/Pop)
24* - Other GL misc features:
25* - GL-style getter functions
26* - Framebuffer resizing
27* - Perspective correction
28* - Scissor clipping
29* - Depth testing
30* - Blend modes
31* - Face culling
32*
33* ADDITIONAL NOTES:
34* Check PR for more info: https://github.com/raysan5/raylib/pull/4832
35*
36* CONFIGURATION:
37* #define RLSW_IMPLEMENTATION
38* Generates the implementation of the library into the included file
39* If not defined, the library is in header only mode and can be included in other headers
40* or source files without problems. But only ONE file should hold the implementation
41*
42* #define RLSW_USE_SIMD_INTRINSICS
43* Detect and use SIMD intrinsics on the host compilation platform
44* SIMD could improve rendering considerable vectorizing some raster operations
45* but the target platforms running the compiled program with SIMD enabled
46* must support the SIMD the program has been built for, making them only
47* recommended under specific situations and only if the developers know
48* what are they doing; this flag is not defined by default
49*
50* rlsw capabilities could be customized just defining some internal
51* values before library inclusion (default values listed):
52*
53* #define SW_GL_FRAMEBUFFER_COPY_BGRA true
54* #define SW_GL_BINDING_COPY_TEXTURE true
55* #define SW_COLOR_BUFFER_BITS 24
56* #define SW_DEPTH_BUFFER_BITS 16
57* #define SW_MAX_PROJECTION_STACK_SIZE 2
58* #define SW_MAX_MODELVIEW_STACK_SIZE 8
59* #define SW_MAX_TEXTURE_STACK_SIZE 2
60* #define SW_MAX_TEXTURES 128
61*
62*
63* LICENSE: MIT
64*
65* Copyright (c) 2025-2026 Le Juez Victor (@Bigfoot71), reviewed by Ramon Santamaria (@raysan5)
66*
67* Permission is hereby granted, free of charge, to any person obtaining a copy
68* of this software and associated documentation files (the "Software"), to deal
69* in the Software without restriction, including without limitation the rights
70* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
71* copies of the Software, and to permit persons to whom the Software is
72* furnished to do so, subject to the following conditions:
73*
74* The above copyright notice and this permission notice shall be included in all
75* copies or substantial portions of the Software.
76*
77* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
78* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
79* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
80* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
81* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
82* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
83* SOFTWARE.
84*
85**********************************************************************************************/
86
87#ifndef RLSW_H
88#define RLSW_H
89
90#include <stdbool.h>
91#include <stdint.h>
92
93//----------------------------------------------------------------------------------
94// Defines and Macros
95//----------------------------------------------------------------------------------
96// Function specifiers definition
97#ifndef SWAPI
98 #define SWAPI // Functions defined as 'extern' by default (implicit specifiers)
99#endif
100
101#ifndef SW_MALLOC
102 #define SW_MALLOC(sz) malloc(sz)
103#endif
104#ifndef SW_REALLOC
105 #define SW_REALLOC(ptr, newSz) realloc(ptr, newSz)
106#endif
107#ifndef SW_FREE
108 #define SW_FREE(ptr) free(ptr)
109#endif
110
111#ifndef SW_RESTRICT
112 #ifdef _MSC_VER
113 #define SW_RESTRICT __restrict
114 #else
115 #define SW_RESTRICT restrict
116 #endif
117#endif
118
119#ifndef SW_GL_FRAMEBUFFER_COPY_BGRA
120 #define SW_GL_FRAMEBUFFER_COPY_BGRA true
121#endif
122
123#ifndef SW_COLOR_BUFFER_BITS
124 #define SW_COLOR_BUFFER_BITS 32 //< 32 (rgba), 16 (rgb packed) or 8 (rgb packed)
125#endif
126
127#ifndef SW_DEPTH_BUFFER_BITS
128 #define SW_DEPTH_BUFFER_BITS 16 //< 32, 24 or 16
129#endif
130
131#ifndef SW_MAX_PROJECTION_STACK_SIZE
132 #define SW_MAX_PROJECTION_STACK_SIZE 2
133#endif
134
135#ifndef SW_MAX_MODELVIEW_STACK_SIZE
136 #define SW_MAX_MODELVIEW_STACK_SIZE 8
137#endif
138
139#ifndef SW_MAX_TEXTURE_STACK_SIZE
140 #define SW_MAX_TEXTURE_STACK_SIZE 2
141#endif
142
143#ifndef SW_MAX_TEXTURES
144 #define SW_MAX_TEXTURES 128
145#endif
146
147// Under normal circumstances, clipping a polygon can add at most one vertex per clipping plane
148// Considering the largest polygon involved is a quadrilateral (4 vertices),
149// and that clipping occurs against both the frustum (6 planes) and the scissors (4 planes),
150// the maximum number of vertices after clipping is:
151// 4 (original vertices) + 6 (frustum planes) + 4 (scissors planes) = 14
152#ifndef SW_MAX_CLIPPED_POLYGON_VERTICES
153 #define SW_MAX_CLIPPED_POLYGON_VERTICES 14
154#endif
155
156#ifndef SW_CLIP_EPSILON
157 #define SW_CLIP_EPSILON 1e-4f
158#endif
159
160//----------------------------------------------------------------------------------
161// OpenGL Compatibility Types
162//----------------------------------------------------------------------------------
163typedef unsigned int GLenum;
164typedef unsigned char GLboolean;
165typedef unsigned int GLbitfield;
166typedef void GLvoid;
167typedef signed char GLbyte;
168typedef short GLshort;
169typedef int GLint;
170typedef unsigned char GLubyte;
171typedef unsigned short GLushort;
172typedef unsigned int GLuint;
173typedef int GLsizei;
174typedef float GLfloat;
175typedef float GLclampf;
176typedef double GLdouble;
177typedef double GLclampd;
178
179//----------------------------------------------------------------------------------
180// OpenGL Definitions
181// NOTE: Not used/supported definitions are commented
182//----------------------------------------------------------------------------------
183#define GL_FALSE 0
184#define GL_TRUE 1
185
186#define GL_SCISSOR_TEST 0x0C11
187#define GL_TEXTURE_2D 0x0DE1
188#define GL_DEPTH_TEST 0x0B71
189#define GL_CULL_FACE 0x0B44
190#define GL_BLEND 0x0BE2
191
192#define GL_VENDOR 0x1F00
193#define GL_RENDERER 0x1F01
194#define GL_VERSION 0x1F02
195#define GL_EXTENSIONS 0x1F03
196
197//#define GL_ATTRIB_STACK_DEPTH 0x0BB0
198//#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1
199#define GL_COLOR_CLEAR_VALUE 0x0C22
200#define GL_DEPTH_CLEAR_VALUE 0x0B73
201//#define GL_COLOR_WRITEMASK 0x0C23
202//#define GL_CURRENT_INDEX 0x0B01
203#define GL_CURRENT_COLOR 0x0B00
204//#define GL_CURRENT_NORMAL 0x0B02
205//#define GL_CURRENT_RASTER_COLOR 0x0B04
206//#define GL_CURRENT_RASTER_DISTANCE 0x0B09
207//#define GL_CURRENT_RASTER_INDEX 0x0B05
208//#define GL_CURRENT_RASTER_POSITION 0x0B07
209//#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06
210//#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08
211#define GL_CURRENT_TEXTURE_COORDS 0x0B03
212#define GL_POINT_SIZE 0x0B11
213#define GL_LINE_WIDTH 0x0B21
214//#define GL_INDEX_CLEAR_VALUE 0x0C20
215//#define GL_INDEX_MODE 0x0C30
216//#define GL_INDEX_WRITEMASK 0x0C21
217#define GL_MODELVIEW_MATRIX 0x0BA6
218#define GL_MODELVIEW_STACK_DEPTH 0x0BA3
219//#define GL_NAME_STACK_DEPTH 0x0D70
220#define GL_PROJECTION_MATRIX 0x0BA7
221#define GL_PROJECTION_STACK_DEPTH 0x0BA4
222//#define GL_RENDER_MODE 0x0C40
223//#define GL_RGBA_MODE 0x0C31
224#define GL_TEXTURE_MATRIX 0x0BA8
225#define GL_TEXTURE_STACK_DEPTH 0x0BA5
226#define GL_VIEWPORT 0x0BA2
227
228#define GL_COLOR_BUFFER_BIT 0x00004000
229#define GL_DEPTH_BUFFER_BIT 0x00000100
230
231#define GL_MODELVIEW 0x1700
232#define GL_PROJECTION 0x1701
233#define GL_TEXTURE 0x1702
234
235#define GL_VERTEX_ARRAY 0x8074
236#define GL_NORMAL_ARRAY 0x8075 // WARNING: Not implemented (defined for RLGL)
237#define GL_COLOR_ARRAY 0x8076
238//#define GL_INDEX_ARRAY 0x8077
239#define GL_TEXTURE_COORD_ARRAY 0x8078
240
241#define GL_POINTS 0x0000
242#define GL_LINES 0x0001
243//#define GL_LINE_LOOP 0x0002
244//#define GL_LINE_STRIP 0x0003
245#define GL_TRIANGLES 0x0004
246//#define GL_TRIANGLE_STRIP 0x0005
247//#define GL_TRIANGLE_FAN 0x0006
248#define GL_QUADS 0x0007
249//#define GL_QUAD_STRIP 0x0008
250//#define GL_POLYGON 0x0009
251
252#define GL_POINT 0x1B00
253#define GL_LINE 0x1B01
254#define GL_FILL 0x1B02
255
256#define GL_FRONT 0x0404
257#define GL_BACK 0x0405
258
259#define GL_ZERO 0
260#define GL_ONE 1
261#define GL_SRC_COLOR 0x0300
262#define GL_ONE_MINUS_SRC_COLOR 0x0301
263#define GL_SRC_ALPHA 0x0302
264#define GL_ONE_MINUS_SRC_ALPHA 0x0303
265#define GL_DST_ALPHA 0x0304
266#define GL_ONE_MINUS_DST_ALPHA 0x0305
267#define GL_DST_COLOR 0x0306
268#define GL_ONE_MINUS_DST_COLOR 0x0307
269#define GL_SRC_ALPHA_SATURATE 0x0308
270
271#define GL_NEAREST 0x2600
272#define GL_LINEAR 0x2601
273
274#define GL_REPEAT 0x2901
275#define GL_CLAMP 0x2900
276
277#define GL_TEXTURE_MAG_FILTER 0x2800
278#define GL_TEXTURE_MIN_FILTER 0x2801
279
280#define GL_TEXTURE_WRAP_S 0x2802
281#define GL_TEXTURE_WRAP_T 0x2803
282
283#define GL_NO_ERROR 0
284#define GL_INVALID_ENUM 0x0500
285#define GL_INVALID_VALUE 0x0501
286#define GL_INVALID_OPERATION 0x0502
287#define GL_STACK_OVERFLOW 0x0503
288#define GL_STACK_UNDERFLOW 0x0504
289#define GL_OUT_OF_MEMORY 0x0505
290
291#define GL_ALPHA 0x1906
292#define GL_LUMINANCE 0x1909
293#define GL_LUMINANCE_ALPHA 0x190A
294#define GL_RGB 0x1907
295#define GL_RGBA 0x1908
296
297#define GL_BYTE 0x1400
298#define GL_UNSIGNED_BYTE 0x1401
299#define GL_SHORT 0x1402
300#define GL_UNSIGNED_SHORT 0x1403
301#define GL_INT 0x1404
302#define GL_UNSIGNED_INT 0x1405
303#define GL_FLOAT 0x1406
304
305// OpenGL Definitions NOT USED
306#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50
307#define GL_PACK_ALIGNMENT 0x0D05
308#define GL_UNPACK_ALIGNMENT 0x0CF5
309#define GL_LINE_SMOOTH 0x0B20
310#define GL_SMOOTH 0x1D01
311#define GL_NICEST 0x1102
312#define GL_CCW 0x0901
313#define GL_CW 0x0900
314#define GL_NEVER 0x0200
315#define GL_LESS 0x0201
316#define GL_EQUAL 0x0202
317#define GL_LEQUAL 0x0203
318#define GL_GREATER 0x0204
319#define GL_NOTEQUAL 0x0205
320#define GL_GEQUAL 0x0206
321#define GL_ALWAYS 0x0207
322
323//----------------------------------------------------------------------------------
324// OpenGL Bindings to rlsw
325//----------------------------------------------------------------------------------
326#define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer((x), (y), (w), (h), (f), (t), (p))
327#define glEnable(state) swEnable((state))
328#define glDisable(state) swDisable((state))
329#define glGetFloatv(pname, params) swGetFloatv((pname), (params))
330#define glGetString(pname) swGetString((pname))
331#define glGetError() swGetError()
332#define glViewport(x, y, w, h) swViewport((x), (y), (w), (h))
333#define glScissor(x, y, w, h) swScissor((x), (y), (w), (h))
334#define glClearColor(r, g, b, a) swClearColor((r), (g), (b), (a))
335#define glClearDepth(d) swClearDepth((d))
336#define glClear(bitmask) swClear((bitmask))
337#define glBlendFunc(sfactor, dfactor) swBlendFunc((sfactor), (dfactor))
338#define glPolygonMode(face, mode) swPolygonMode((mode))
339#define glCullFace(face) swCullFace((face))
340#define glPointSize(size) swPointSize((size))
341#define glLineWidth(width) swLineWidth((width))
342#define glMatrixMode(mode) swMatrixMode((mode))
343#define glPushMatrix() swPushMatrix()
344#define glPopMatrix() swPopMatrix()
345#define glLoadIdentity() swLoadIdentity()
346#define glTranslatef(x, y, z) swTranslatef((x), (y), (z))
347#define glRotatef(a, x, y, z) swRotatef((a), (x), (y), (z))
348#define glScalef(x, y, z) swScalef((x), (y), (z))
349#define glMultMatrixf(v) swMultMatrixf((v))
350#define glFrustum(l, r, b, t, n, f) swFrustum((l), (r), (b), (t), (n), (f))
351#define glOrtho(l, r, b, t, n, f) swOrtho((l), (r), (b), (t), (n), (f))
352#define glBegin(mode) swBegin((mode))
353#define glEnd() swEnd()
354#define glVertex2i(x, y) swVertex2i((x), (y))
355#define glVertex2f(x, y) swVertex2f((x), (y))
356#define glVertex2fv(v) swVertex2fv((v))
357#define glVertex3i(x, y, z) swVertex3i((x), (y), (z))
358#define glVertex3f(x, y, z) swVertex3f((x), (y), (z))
359#define glvertex3fv(v) swVertex3fv((v))
360#define glVertex4i(x, y, z, w) swVertex4i((x), (y), (z), (w))
361#define glVertex4f(x, y, z, w) swVertex4f((x), (y), (z), (w))
362#define glVertex4fv(v) swVertex4fv((v))
363#define glColor3ub(r, g, b) swColor3ub((r), (g), (b))
364#define glColor3ubv(v) swColor3ubv((v))
365#define glColor3f(r, g, b) swColor3f((r), (g), (b))
366#define glColor3fv(v) swColor3fv((v))
367#define glColor4ub(r, g, b, a) swColor4ub((r), (g), (b), (a))
368#define glColor4ubv(v) swColor4ubv((v))
369#define glColor4f(r, g, b, a) swColor4f((r), (g), (b), (a))
370#define glColor4fv(v) swColor4fv((v))
371#define glTexCoord2f(u, v) swTexCoord2f((u), (v))
372#define glTexCoord2fv(v) swTexCoord2fv((v))
373
374#define glEnableClientState(t) ((void)(t))
375#define glDisableClientState(t) swBindArray((t), 0)
376#define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, (p))
377#define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, (p))
378#define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, (p))
379#define glDrawArrays(m, o, c) swDrawArrays((m), (o), (c))
380#define glDrawElements(m,c,t,i) swDrawElements((m),(c),(t),(i))
381#define glGenTextures(c, v) swGenTextures((c), (v))
382#define glDeleteTextures(c, v) swDeleteTextures((c), (v))
383#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D((w), (h), (f), (t), (p))
384#define glTexParameteri(tr, pname, param) swTexParameteri((pname), (param))
385#define glBindTexture(tr, id) swBindTexture((id))
386
387// OpenGL functions NOT IMPLEMENTED by rlsw
388#define glDepthMask(X) ((void)(X))
389#define glColorMask(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W))
390#define glPixelStorei(X,Y) ((void)(X),(void)(Y))
391#define glHint(X,Y) ((void)(X),(void)(Y))
392#define glShadeModel(X) ((void)(X))
393#define glFrontFace(X) ((void)(X))
394#define glDepthFunc(X) ((void)(X))
395#define glTexSubImage2D(X,Y,Z,W,A,B,C,D,E) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A),(void)(B),(void)(C),(void)(D),(void)(E))
396#define glGetTexImage(X,Y,Z,W,A) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A))
397#define glNormal3f(X,Y,Z) ((void)(X),(void)(Y),(void)(Z))
398#define glNormal3fv(X) ((void)(X))
399#define glNormalPointer(X,Y,Z) ((void)(X),(void)(Y),(void)(Z))
400
401//----------------------------------------------------------------------------------
402// Types and Structures Definition
403//----------------------------------------------------------------------------------
404typedef enum {
405 SW_SCISSOR_TEST = GL_SCISSOR_TEST,
406 SW_TEXTURE_2D = GL_TEXTURE_2D,
407 SW_DEPTH_TEST = GL_DEPTH_TEST,
408 SW_CULL_FACE = GL_CULL_FACE,
409 SW_BLEND = GL_BLEND
410} SWstate;
411
412typedef enum {
413 SW_VENDOR = GL_VENDOR,
414 SW_RENDERER = GL_RENDERER,
415 SW_VERSION = GL_VERSION,
416 SW_EXTENSIONS = GL_EXTENSIONS,
417 SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE,
418 SW_DEPTH_CLEAR_VALUE = GL_DEPTH_CLEAR_VALUE,
419 SW_CURRENT_COLOR = GL_CURRENT_COLOR,
420 SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS,
421 SW_POINT_SIZE = GL_POINT_SIZE,
422 SW_LINE_WIDTH = GL_LINE_WIDTH,
423 SW_MODELVIEW_MATRIX = GL_MODELVIEW_MATRIX,
424 SW_MODELVIEW_STACK_DEPTH = GL_MODELVIEW_STACK_DEPTH,
425 SW_PROJECTION_MATRIX = GL_PROJECTION_MATRIX,
426 SW_PROJECTION_STACK_DEPTH = GL_PROJECTION_STACK_DEPTH,
427 SW_TEXTURE_MATRIX = GL_TEXTURE_MATRIX,
428 SW_TEXTURE_STACK_DEPTH = GL_TEXTURE_STACK_DEPTH,
429 SW_VIEWPORT = GL_VIEWPORT
430} SWget;
431
432typedef enum {
433 SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT,
434 SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT
435} SWbuffer;
436
437typedef enum {
438 SW_PROJECTION = GL_PROJECTION,
439 SW_MODELVIEW = GL_MODELVIEW,
440 SW_TEXTURE = GL_TEXTURE
441} SWmatrix;
442
443typedef enum {
444 SW_VERTEX_ARRAY = GL_VERTEX_ARRAY,
445 SW_TEXTURE_COORD_ARRAY = GL_TEXTURE_COORD_ARRAY,
446 SW_COLOR_ARRAY = GL_COLOR_ARRAY
447} SWarray;
448
449typedef enum {
450 SW_POINTS = GL_POINTS,
451 SW_LINES = GL_LINES,
452 SW_TRIANGLES = GL_TRIANGLES,
453 SW_QUADS = GL_QUADS
454} SWdraw;
455
456typedef enum {
457 SW_POINT = GL_POINT,
458 SW_LINE = GL_LINE,
459 SW_FILL = GL_FILL
460} SWpoly;
461
462typedef enum {
463 SW_FRONT = GL_FRONT,
464 SW_BACK = GL_BACK,
465} SWface;
466
467typedef enum {
468 SW_ZERO = GL_ZERO,
469 SW_ONE = GL_ONE,
470 SW_SRC_COLOR = GL_SRC_COLOR,
471 SW_ONE_MINUS_SRC_COLOR = GL_ONE_MINUS_SRC_COLOR,
472 SW_SRC_ALPHA = GL_SRC_ALPHA,
473 SW_ONE_MINUS_SRC_ALPHA = GL_ONE_MINUS_SRC_ALPHA,
474 SW_DST_ALPHA = GL_DST_ALPHA,
475 SW_ONE_MINUS_DST_ALPHA = GL_ONE_MINUS_DST_ALPHA,
476 SW_DST_COLOR = GL_DST_COLOR,
477 SW_ONE_MINUS_DST_COLOR = GL_ONE_MINUS_DST_COLOR,
478 SW_SRC_ALPHA_SATURATE = GL_SRC_ALPHA_SATURATE
479} SWfactor;
480
481typedef enum {
482 SW_LUMINANCE = GL_LUMINANCE,
483 SW_LUMINANCE_ALPHA = GL_LUMINANCE_ALPHA,
484 SW_RGB = GL_RGB,
485 SW_RGBA = GL_RGBA,
486} SWformat;
487
488typedef enum {
489 SW_UNSIGNED_BYTE = GL_UNSIGNED_BYTE,
490 SW_BYTE = GL_BYTE,
491 SW_UNSIGNED_SHORT = GL_UNSIGNED_SHORT,
492 SW_SHORT = GL_SHORT,
493 SW_UNSIGNED_INT = GL_UNSIGNED_INT,
494 SW_INT = GL_INT,
495 SW_FLOAT = GL_FLOAT
496} SWtype;
497
498typedef enum {
499 SW_NEAREST = GL_NEAREST,
500 SW_LINEAR = GL_LINEAR
501} SWfilter;
502
503typedef enum {
504 SW_REPEAT = GL_REPEAT,
505 SW_CLAMP = GL_CLAMP,
506} SWwrap;
507
508typedef enum {
509 SW_TEXTURE_MIN_FILTER = GL_TEXTURE_MIN_FILTER,
510 SW_TEXTURE_MAG_FILTER = GL_TEXTURE_MAG_FILTER,
511 SW_TEXTURE_WRAP_S = GL_TEXTURE_WRAP_S,
512 SW_TEXTURE_WRAP_T = GL_TEXTURE_WRAP_T
513} SWtexparam;
514
515typedef enum {
516 SW_NO_ERROR = GL_NO_ERROR,
517 SW_INVALID_ENUM = GL_INVALID_ENUM,
518 SW_INVALID_VALUE = GL_INVALID_VALUE,
519 SW_STACK_OVERFLOW = GL_STACK_OVERFLOW,
520 SW_STACK_UNDERFLOW = GL_STACK_UNDERFLOW,
521 SW_INVALID_OPERATION = GL_INVALID_OPERATION,
522} SWerrcode;
523
524//------------------------------------------------------------------------------------
525// Functions Declaration - Public API
526//------------------------------------------------------------------------------------
527SWAPI bool swInit(int w, int h);
528SWAPI void swClose(void);
529
530SWAPI bool swResizeFramebuffer(int w, int h);
531SWAPI void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void *pixels);
532SWAPI void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void *pixels);
533
534SWAPI void swEnable(SWstate state);
535SWAPI void swDisable(SWstate state);
536
537SWAPI void swGetFloatv(SWget name, float *v);
538SWAPI const char *swGetString(SWget name);
539SWAPI SWerrcode swGetError(void);
540
541SWAPI void swViewport(int x, int y, int width, int height);
542SWAPI void swScissor(int x, int y, int width, int height);
543
544SWAPI void swClearColor(float r, float g, float b, float a);
545SWAPI void swClearDepth(float depth);
546SWAPI void swClear(uint32_t bitmask);
547
548SWAPI void swBlendFunc(SWfactor sfactor, SWfactor dfactor);
549SWAPI void swPolygonMode(SWpoly mode);
550SWAPI void swCullFace(SWface face);
551
552SWAPI void swPointSize(float size);
553SWAPI void swLineWidth(float width);
554
555SWAPI void swMatrixMode(SWmatrix mode);
556SWAPI void swPushMatrix(void);
557SWAPI void swPopMatrix(void);
558SWAPI void swLoadIdentity(void);
559SWAPI void swTranslatef(float x, float y, float z);
560SWAPI void swRotatef(float angle, float x, float y, float z);
561SWAPI void swScalef(float x, float y, float z);
562SWAPI void swMultMatrixf(const float *mat);
563SWAPI void swFrustum(double left, double right, double bottom, double top, double znear, double zfar);
564SWAPI void swOrtho(double left, double right, double bottom, double top, double znear, double zfar);
565
566SWAPI void swBegin(SWdraw mode);
567SWAPI void swEnd(void);
568
569SWAPI void swVertex2i(int x, int y);
570SWAPI void swVertex2f(float x, float y);
571SWAPI void swVertex2fv(const float *v);
572SWAPI void swVertex3i(int x, int y, int z);
573SWAPI void swVertex3f(float x, float y, float z);
574SWAPI void swVertex3fv(const float *v);
575SWAPI void swVertex4i(int x, int y, int z, int w);
576SWAPI void swVertex4f(float x, float y, float z, float w);
577SWAPI void swVertex4fv(const float *v);
578
579SWAPI void swColor3ub(uint8_t r, uint8_t g, uint8_t b);
580SWAPI void swColor3ubv(const uint8_t *v);
581SWAPI void swColor3f(float r, float g, float b);
582SWAPI void swColor3fv(const float *v);
583SWAPI void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a);
584SWAPI void swColor4ubv(const uint8_t *v);
585SWAPI void swColor4f(float r, float g, float b, float a);
586SWAPI void swColor4fv(const float *v);
587
588SWAPI void swTexCoord2f(float u, float v);
589SWAPI void swTexCoord2fv(const float *v);
590
591SWAPI void swBindArray(SWarray type, void *buffer);
592SWAPI void swDrawArrays(SWdraw mode, int offset, int count);
593SWAPI void swDrawElements(SWdraw mode, int count, int type, const void *indices);
594
595SWAPI void swGenTextures(int count, uint32_t *textures);
596SWAPI void swDeleteTextures(int count, uint32_t *textures);
597
598SWAPI void swTexImage2D(int width, int height, SWformat format, SWtype type, const void *data);
599SWAPI void swTexParameteri(int param, int value);
600SWAPI void swBindTexture(uint32_t id);
601
602#endif // RLSW_H
603
604/***********************************************************************************
605*
606* RLSW IMPLEMENTATION
607*
608************************************************************************************/
609#define RLSW_IMPLEMENTATION
610#if defined(RLSW_IMPLEMENTATION)
611
612#include <stdlib.h> // Required for: malloc(), free()
613#include <stddef.h> // Required for: NULL, size_t, uint8_t, uint16_t, uint32_t...
614#include <math.h> // Required for: sinf(), cosf(), floorf(), fabsf(), sqrtf(), roundf()
615
616// Simple log system to avoid printf() calls if required
617// NOTE: Avoiding those calls, also avoids const strings memory usage
618#define SW_SUPPORT_LOG_INFO
619#if defined(SW_SUPPORT_LOG_INFO) //&& defined(_DEBUG) // WARNING: LOG() output required for this tool
620 #include <stdio.h>
621 #define SW_LOG(...) printf(__VA_ARGS__)
622#else
623 #define SW_LOG(...)
624#endif
625
626#if defined(_MSC_VER)
627 #define SW_ALIGN(x) __declspec(align(x))
628#elif defined(__GNUC__) || defined(__clang__)
629 #define SW_ALIGN(x) __attribute__((aligned(x)))
630#else
631 #define SW_ALIGN(x) // Do nothing if not available
632#endif
633
634#if defined(_M_X64) || defined(__x86_64__)
635 #define SW_ARCH_X86_64
636#elif defined(_M_IX86) || defined(__i386__)
637 #define SW_ARCH_X86
638#elif defined(_M_ARM) || defined(__arm__)
639 #define SW_ARCH_ARM32
640#elif defined(_M_ARM64) || defined(__aarch64__)
641 #define SW_ARCH_ARM64
642#elif defined(__riscv)
643 #define SW_ARCH_RISCV
644#endif
645
646#if defined(RLSW_USE_SIMD_INTRINSICS)
647 // Check for SIMD vector instructions
648 // NOTE: Compiler is responsible to enable required flags for host device,
649 // supported features are detected at compiler init but varies depending on compiler
650 // TODO: This logic must be reviewed to avoid the inclusion of multiple headers
651 // and enable the higher level of SIMD available
652 #if defined(__FMA__) && defined(__AVX2__)
653 #define SW_HAS_FMA_AVX2
654 #include <immintrin.h>
655 #elif defined(__FMA__) && defined(__AVX__)
656 #define SW_HAS_FMA_AVX
657 #include <immintrin.h>
658 #elif defined(__AVX2__)
659 #define SW_HAS_AVX2
660 #include <immintrin.h>
661 #elif defined(__AVX__)
662 #define SW_HAS_AVX
663 #include <immintrin.h>
664 #endif
665 #if defined(__SSE4_2__)
666 #define SW_HAS_SSE42
667 #include <nmmintrin.h>
668 #elif defined(__SSE4_1__)
669 #define SW_HAS_SSE41
670 #include <smmintrin.h>
671 #elif defined(__SSSE3__)
672 #define SW_HAS_SSSE3
673 #include <tmmintrin.h>
674 #elif defined(__SSE3__)
675 #define SW_HAS_SSE3
676 #include <pmmintrin.h>
677 #elif defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64
678 #define SW_HAS_SSE2
679 #include <emmintrin.h>
680 #elif defined(__SSE__)
681 #define SW_HAS_SSE
682 #include <xmmintrin.h>
683 #endif
684 #if defined(__ARM_NEON) || defined(__aarch64__)
685 #if defined(__ARM_FEATURE_FMA)
686 #define SW_HAS_NEON_FMA
687 #else
688 #define SW_HAS_NEON
689 #endif
690 #include <arm_neon.h>
691 #endif
692 #if defined(__riscv_vector)
693 // NOTE: Requires compilation flags: -march=rv64gcv -mabi=lp64d
694 #define SW_HAS_RVV
695 #include <riscv_vector.h>
696 #endif
697#endif // RLSW_USE_SIMD_INTRINSICS
698
699#ifdef __cplusplus
700 #define SW_CURLY_INIT(name) name
701#else
702 #define SW_CURLY_INIT(name) (name)
703#endif
704
705//----------------------------------------------------------------------------------
706// Defines and Macros
707//----------------------------------------------------------------------------------
708#define SW_PI 3.14159265358979323846f
709#define SW_INV_255 0.00392156862745098f // 1.0f/255.0f
710#define SW_DEG2RAD (SW_PI/180.0f)
711#define SW_RAD2DEG (180.0f/SW_PI)
712
713#define SW_COLOR_PIXEL_SIZE (SW_COLOR_BUFFER_BITS >> 3)
714#define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS >> 3)
715#define SW_PIXEL_SIZE (SW_COLOR_PIXEL_SIZE + SW_DEPTH_PIXEL_SIZE)
716
717#if (SW_PIXEL_SIZE <= 4)
718 #define SW_PIXEL_ALIGNMENT 4
719#else // if (SW_PIXEL_SIZE <= 8)
720 #define SW_PIXEL_ALIGNMENT 8
721#endif
722
723#if (SW_COLOR_BUFFER_BITS == 8)
724 #define SW_COLOR_TYPE uint8_t
725 #define SW_COLOR_IS_PACKED 1
726 #define SW_COLOR_PACK_COMP 1
727 #define SW_PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03)
728 #define SW_UNPACK_R(p) (((p)>>5)&0x07)
729 #define SW_UNPACK_G(p) (((p)>>2)&0x07)
730 #define SW_UNPACK_B(p) ((p)&0x03)
731 #define SW_SCALE_R(v) ((v)*255+3)/7
732 #define SW_SCALE_G(v) ((v)*255+3)/7
733 #define SW_SCALE_B(v) ((v)*255+1)/3
734 #define SW_TO_FLOAT_R(v) ((v)*(1.0f/7.0f))
735 #define SW_TO_FLOAT_G(v) ((v)*(1.0f/7.0f))
736 #define SW_TO_FLOAT_B(v) ((v)*(1.0f/3.0f))
737#elif (SW_COLOR_BUFFER_BITS == 16)
738 #define SW_COLOR_TYPE uint16_t
739 #define SW_COLOR_IS_PACKED 1
740 #define SW_COLOR_PACK_COMP 1
741 #define SW_PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F)
742 #define SW_UNPACK_R(p) (((p)>>11)&0x1F)
743 #define SW_UNPACK_G(p) (((p)>>5)&0x3F)
744 #define SW_UNPACK_B(p) ((p)&0x1F)
745 #define SW_SCALE_R(v) ((v)*255+15)/31
746 #define SW_SCALE_G(v) ((v)*255+31)/63
747 #define SW_SCALE_B(v) ((v)*255+15)/31
748 #define SW_TO_FLOAT_R(v) ((v)*(1.0f/31.0f))
749 #define SW_TO_FLOAT_G(v) ((v)*(1.0f/63.0f))
750 #define SW_TO_FLOAT_B(v) ((v)*(1.0f/31.0f))
751#else // 32 bits
752 #define SW_COLOR_TYPE uint8_t
753 #define SW_COLOR_IS_PACKED 0
754 #define SW_COLOR_PACK_COMP 4
755#endif
756
757#if (SW_DEPTH_BUFFER_BITS == 16)
758 #define SW_DEPTH_TYPE uint16_t
759 #define SW_DEPTH_IS_PACKED 1
760 #define SW_DEPTH_PACK_COMP 1
761 #define SW_DEPTH_MAX UINT16_MAX
762 #define SW_DEPTH_SCALE (1.0f/UINT16_MAX)
763 #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
764 #define SW_UNPACK_DEPTH(p) (p)
765#elif (SW_DEPTH_BUFFER_BITS == 24)
766 #define SW_DEPTH_TYPE uint8_t
767 #define SW_DEPTH_IS_PACKED 0
768 #define SW_DEPTH_PACK_COMP 3
769 #define SW_DEPTH_MAX 0xFFFFFFU
770 #define SW_DEPTH_SCALE (1.0f/0xFFFFFFU)
771 #define SW_PACK_DEPTH_0(d) ((uint8_t)(((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFFU))
772 #define SW_PACK_DEPTH_1(d) ((uint8_t)(((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFFU))
773 #define SW_PACK_DEPTH_2(d) ((uint8_t)((uint32_t)((d)*SW_DEPTH_MAX)&0xFFU))
774 #define SW_UNPACK_DEPTH(p) ((((uint32_t)(p)[0]<<16)|((uint32_t)(p)[1]<<8)|(uint32_t)(p)[2]))
775#else // 32 bits
776 #define SW_DEPTH_TYPE float
777 #define SW_DEPTH_IS_PACKED 1
778 #define SW_DEPTH_PACK_COMP 1
779 #define SW_DEPTH_MAX 1.0f
780 #define SW_DEPTH_SCALE 1.0f
781 #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)(d))
782 #define SW_UNPACK_DEPTH(p) (p)
783#endif
784
785#define SW_STATE_CHECK(flags) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags)))
786#define SW_STATE_CHECK_EX(state, flags) (((state) & (flags)) == (flags))
787
788#define SW_STATE_SCISSOR_TEST (1 << 0)
789#define SW_STATE_TEXTURE_2D (1 << 1)
790#define SW_STATE_DEPTH_TEST (1 << 2)
791#define SW_STATE_CULL_FACE (1 << 3)
792#define SW_STATE_BLEND (1 << 4)
793
794//----------------------------------------------------------------------------------
795// Module Types and Structures Definition
796//----------------------------------------------------------------------------------
797// Pixel data format type
798// NOTE: Enum aligned with raylib PixelFormat
799typedef enum {
800 SW_PIXELFORMAT_UNKNOWN = 0,
801 SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE, // 8 bit per pixel (no alpha)
802 SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels)
803 SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp
804 SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp
805 SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha)
806 SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha)
807 SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp
808 SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float)
809 SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float)
810 SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float)
811 SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float)
812 SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float)
813 SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float)
814} sw_pixelformat_t;
815
816typedef void (*sw_factor_f)(
817 float *SW_RESTRICT factor,
818 const float *SW_RESTRICT src,
819 const float *SW_RESTRICT dst
820);
821
822typedef float sw_matrix_t[4*4];
823typedef uint16_t sw_half_t;
824
825typedef struct {
826 float position[4]; // Position coordinates
827 float texcoord[2]; // Texture coordinates
828 float color[4]; // Color value (RGBA)
829
830 float homogeneous[4]; // Homogeneous coordinates
831 float screen[2]; // Screen coordinates
832} sw_vertex_t;
833
834typedef struct {
835 uint8_t *pixels; // Texture pixels (RGBA32)
836
837 int width, height; // Dimensions of the texture
838 int wMinus1, hMinus1; // Dimensions minus one
839
840 SWfilter minFilter; // Minification filter
841 SWfilter magFilter; // Magnification filter
842
843 SWwrap sWrap; // texcoord.x wrap mode
844 SWwrap tWrap; // texcoord.y wrap mode
845
846 float tx; // Texel width
847 float ty; // Texel height
848} sw_texture_t;
849
850// Pixel data type
851typedef SW_ALIGN(SW_PIXEL_ALIGNMENT) struct {
852 SW_COLOR_TYPE color[SW_COLOR_PACK_COMP];
853 SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP];
854#if (SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT != 0)
855 uint8_t padding[SW_PIXEL_ALIGNMENT - SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT];
856#endif
857} sw_pixel_t;
858
859typedef struct {
860 sw_pixel_t *pixels;
861 int width;
862 int height;
863 int allocSz;
864} sw_framebuffer_t;
865
866typedef struct {
867 sw_framebuffer_t framebuffer; // Main framebuffer
868 sw_pixel_t clearValue; // Clear value of the framebuffer
869
870 float vpCenter[2]; // Viewport center
871 float vpHalf[2]; // Viewport half dimensions
872 int vpSize[2]; // Viewport dimensions (minus one)
873 int vpMin[2]; // Viewport minimum renderable point (top-left)
874 int vpMax[2]; // Viewport maximum renderable point (bottom-right)
875
876 int scMin[2]; // Scissor rectangle minimum renderable point (top-left)
877 int scMax[2]; // Scissor rectangle maximum renderable point (bottom-right)
878 float scClipMin[2]; // Scissor rectangle minimum renderable point in clip space
879 float scClipMax[2]; // Scissor rectangle maximum renderable point in clip space
880
881 uint32_t currentTexture; // Current active texture id
882
883 struct {
884 float *positions;
885 float *texcoords;
886 uint8_t *colors;
887 } array;
888
889 struct {
890 float texcoord[2];
891 float color[4];
892 } current;
893
894 sw_vertex_t vertexBuffer[SW_MAX_CLIPPED_POLYGON_VERTICES]; // Buffer used for storing primitive vertices, used for processing and rendering
895 int vertexCounter; // Number of vertices in 'ctx.vertexBuffer'
896
897 SWdraw drawMode; // Current primitive mode (e.g., lines, triangles)
898 SWpoly polyMode; // Current polygon filling mode (e.g., lines, triangles)
899 int reqVertices; // Number of vertices required for the primitive being drawn
900 float pointRadius; // Rasterized point radius
901 float lineWidth; // Rasterized line width
902
903 sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations
904 sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations
905 sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations
906 uint32_t stackProjectionCounter; // Counter for matrix stack operations
907 uint32_t stackModelviewCounter; // Counter for matrix stack operations
908 uint32_t stackTextureCounter; // Counter for matrix stack operations
909 SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION)
910 sw_matrix_t *currentMatrix; // Pointer to the currently used matrix according to the mode
911 sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally
912 bool isDirtyMVP; // Indicates if the MVP matrix should be rebuilt
913
914 SWfactor srcFactor;
915 SWfactor dstFactor;
916
917 sw_factor_f srcFactorFunc;
918 sw_factor_f dstFactorFunc;
919
920 SWface cullFace; // Faces to cull
921 SWerrcode errCode; // Last error code
922
923 sw_texture_t *loadedTextures;
924 int loadedTextureCount;
925
926 uint32_t *freeTextureIds;
927 int freeTextureIdCount;
928
929 uint32_t stateFlags;
930} sw_context_t;
931
932//----------------------------------------------------------------------------------
933// Global Variables Definition
934//----------------------------------------------------------------------------------
935static sw_context_t RLSW = { 0 };
936
937//----------------------------------------------------------------------------------
938// Module Functions Declaration
939//----------------------------------------------------------------------------------
940static inline void sw_matrix_id(sw_matrix_t dst)
941{
942 dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0;
943 dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0;
944 dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0;
945 dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1;
946}
947
948static inline void sw_matrix_mul_rst(float *SW_RESTRICT dst, const float *SW_RESTRICT left, const float *SW_RESTRICT right)
949{
950 float l00 = left[0], l01 = left[1], l02 = left[2], l03 = left[3];
951 float l10 = left[4], l11 = left[5], l12 = left[6], l13 = left[7];
952 float l20 = left[8], l21 = left[9], l22 = left[10], l23 = left[11];
953 float l30 = left[12], l31 = left[13], l32 = left[14], l33 = left[15];
954
955 dst[0] = l00*right[0] + l01*right[4] + l02*right[8] + l03*right[12];
956 dst[4] = l10*right[0] + l11*right[4] + l12*right[8] + l13*right[12];
957 dst[8] = l20*right[0] + l21*right[4] + l22*right[8] + l23*right[12];
958 dst[12] = l30*right[0] + l31*right[4] + l32*right[8] + l33*right[12];
959
960 dst[1] = l00*right[1] + l01*right[5] + l02*right[9] + l03*right[13];
961 dst[5] = l10*right[1] + l11*right[5] + l12*right[9] + l13*right[13];
962 dst[9] = l20*right[1] + l21*right[5] + l22*right[9] + l23*right[13];
963 dst[13] = l30*right[1] + l31*right[5] + l32*right[9] + l33*right[13];
964
965 dst[2] = l00*right[2] + l01*right[6] + l02*right[10] + l03*right[14];
966 dst[6] = l10*right[2] + l11*right[6] + l12*right[10] + l13*right[14];
967 dst[10] = l20*right[2] + l21*right[6] + l22*right[10] + l23*right[14];
968 dst[14] = l30*right[2] + l31*right[6] + l32*right[10] + l33*right[14];
969
970 dst[3] = l00*right[3] + l01*right[7] + l02*right[11] + l03*right[15];
971 dst[7] = l10*right[3] + l11*right[7] + l12*right[11] + l13*right[15];
972 dst[11] = l20*right[3] + l21*right[7] + l22*right[11] + l23*right[15];
973 dst[15] = l30*right[3] + l31*right[7] + l32*right[11] + l33*right[15];
974}
975
976static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right)
977{
978 float result[16];
979
980 sw_matrix_mul_rst(result, left, right);
981
982 for (int i = 0; i < 16; i++) dst[i] = result[i];
983}
984
985static inline float sw_saturate(float x)
986{
987 union { float f; uint32_t u; } fb;
988 fb.f = x;
989
990 // Check if x < 0.0f
991 // If sign bit is set (MSB), x is negative
992 if ((fb.u & 0x80000000) != 0) return 0.0f;
993
994 // Check if x > 1.0f
995 // Works for positive floats: IEEE 754 ordering matches integer ordering
996 if (fb.u > 0x3F800000) return 1.0f;
997
998 // x is in [0.0f, 1.0f]
999 return x;
1000}
1001
1002static inline float sw_fract(float x)
1003{
1004 return (x - floorf(x));
1005}
1006
1007static inline int sw_clampi(int v, int min, int max)
1008{
1009 if (v < min) return min;
1010 if (v > max) return max;
1011 return v;
1012}
1013
1014static inline void sw_lerp_vertex_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT a, const sw_vertex_t *SW_RESTRICT b, float t)
1015{
1016 const float tInv = 1.0f - t;
1017
1018 // Position interpolation (4 components)
1019 out->position[0] = a->position[0]*tInv + b->position[0]*t;
1020 out->position[1] = a->position[1]*tInv + b->position[1]*t;
1021 out->position[2] = a->position[2]*tInv + b->position[2]*t;
1022 out->position[3] = a->position[3]*tInv + b->position[3]*t;
1023
1024 // Texture coordinate interpolation (2 components)
1025 out->texcoord[0] = a->texcoord[0]*tInv + b->texcoord[0]*t;
1026 out->texcoord[1] = a->texcoord[1]*tInv + b->texcoord[1]*t;
1027
1028 // Color interpolation (4 components)
1029 out->color[0] = a->color[0]*tInv + b->color[0]*t;
1030 out->color[1] = a->color[1]*tInv + b->color[1]*t;
1031 out->color[2] = a->color[2]*tInv + b->color[2]*t;
1032 out->color[3] = a->color[3]*tInv + b->color[3]*t;
1033
1034 // Homogeneous coordinate interpolation (4 components)
1035 out->homogeneous[0] = a->homogeneous[0]*tInv + b->homogeneous[0]*t;
1036 out->homogeneous[1] = a->homogeneous[1]*tInv + b->homogeneous[1]*t;
1037 out->homogeneous[2] = a->homogeneous[2]*tInv + b->homogeneous[2]*t;
1038 out->homogeneous[3] = a->homogeneous[3]*tInv + b->homogeneous[3]*t;
1039}
1040
1041static inline void sw_get_vertex_grad_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT a, const sw_vertex_t *SW_RESTRICT b, float scale)
1042{
1043 // Calculate gradients for Position
1044 out->position[0] = (b->position[0] - a->position[0])*scale;
1045 out->position[1] = (b->position[1] - a->position[1])*scale;
1046 out->position[2] = (b->position[2] - a->position[2])*scale;
1047 out->position[3] = (b->position[3] - a->position[3])*scale;
1048
1049 // Calculate gradients for Texture coordinates
1050 out->texcoord[0] = (b->texcoord[0] - a->texcoord[0])*scale;
1051 out->texcoord[1] = (b->texcoord[1] - a->texcoord[1])*scale;
1052
1053 // Calculate gradients for Color
1054 out->color[0] = (b->color[0] - a->color[0])*scale;
1055 out->color[1] = (b->color[1] - a->color[1])*scale;
1056 out->color[2] = (b->color[2] - a->color[2])*scale;
1057 out->color[3] = (b->color[3] - a->color[3])*scale;
1058
1059 // Calculate gradients for Homogeneous coordinates
1060 out->homogeneous[0] = (b->homogeneous[0] - a->homogeneous[0])*scale;
1061 out->homogeneous[1] = (b->homogeneous[1] - a->homogeneous[1])*scale;
1062 out->homogeneous[2] = (b->homogeneous[2] - a->homogeneous[2])*scale;
1063 out->homogeneous[3] = (b->homogeneous[3] - a->homogeneous[3])*scale;
1064}
1065
1066static inline void sw_add_vertex_grad_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT gradients)
1067{
1068 // Add gradients to Position
1069 out->position[0] += gradients->position[0];
1070 out->position[1] += gradients->position[1];
1071 out->position[2] += gradients->position[2];
1072 out->position[3] += gradients->position[3];
1073
1074 // Add gradients to Texture coordinates
1075 out->texcoord[0] += gradients->texcoord[0];
1076 out->texcoord[1] += gradients->texcoord[1];
1077
1078 // Add gradients to Color
1079 out->color[0] += gradients->color[0];
1080 out->color[1] += gradients->color[1];
1081 out->color[2] += gradients->color[2];
1082 out->color[3] += gradients->color[3];
1083
1084 // Add gradients to Homogeneous coordinates
1085 out->homogeneous[0] += gradients->homogeneous[0];
1086 out->homogeneous[1] += gradients->homogeneous[1];
1087 out->homogeneous[2] += gradients->homogeneous[2];
1088 out->homogeneous[3] += gradients->homogeneous[3];
1089}
1090
1091static inline void sw_add_vertex_grad_scaled_PTCH(
1092 sw_vertex_t *SW_RESTRICT out,
1093 const sw_vertex_t *SW_RESTRICT gradients,
1094 float scale)
1095{
1096 // Add gradients to Position
1097 out->position[0] += gradients->position[0]*scale;
1098 out->position[1] += gradients->position[1]*scale;
1099 out->position[2] += gradients->position[2]*scale;
1100 out->position[3] += gradients->position[3]*scale;
1101
1102 // Add gradients to Texture coordinates
1103 out->texcoord[0] += gradients->texcoord[0]*scale;
1104 out->texcoord[1] += gradients->texcoord[1]*scale;
1105
1106 // Add gradients to Color
1107 out->color[0] += gradients->color[0]*scale;
1108 out->color[1] += gradients->color[1]*scale;
1109 out->color[2] += gradients->color[2]*scale;
1110 out->color[3] += gradients->color[3]*scale;
1111
1112 // Add gradients to Homogeneous coordinates
1113 out->homogeneous[0] += gradients->homogeneous[0]*scale;
1114 out->homogeneous[1] += gradients->homogeneous[1]*scale;
1115 out->homogeneous[2] += gradients->homogeneous[2]*scale;
1116 out->homogeneous[3] += gradients->homogeneous[3]*scale;
1117}
1118
1119static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4])
1120{
1121#if defined(SW_HAS_NEON)
1122 float32x4_t values = vld1q_f32(src);
1123 float32x4_t scaled = vmulq_n_f32(values, 255.0f);
1124 int32x4_t clamped_s32 = vcvtq_s32_f32(scaled); // f32 -> s32 (truncated)
1125 int16x4_t narrow16_s = vqmovn_s32(clamped_s32);
1126 int16x8_t combined16_s = vcombine_s16(narrow16_s, narrow16_s);
1127 uint8x8_t narrow8_u = vqmovun_s16(combined16_s);
1128 vst1_lane_u32((uint32_t*)dst, vreinterpret_u32_u8(narrow8_u), 0);
1129#elif defined(SW_HAS_SSE41)
1130 __m128 values = _mm_loadu_ps(src);
1131 __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f));
1132 __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated)
1133 clamped = _mm_packus_epi32(clamped, clamped); // s32 -> u16 (saturated < 0 to 0)
1134 clamped = _mm_packus_epi16(clamped, clamped); // u16 -> u8 (saturated > 255 to 255)
1135 *(uint32_t*)dst = _mm_cvtsi128_si32(clamped);
1136#elif defined(SW_HAS_SSE2)
1137 __m128 values = _mm_loadu_ps(src);
1138 __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f));
1139 __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated)
1140 clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated)
1141 clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 to 0)
1142 *(uint32_t*)dst = _mm_cvtsi128_si32(clamped);
1143#elif defined(SW_HAS_RVV)
1144 // TODO: Sample code generated by AI, needs testing and review
1145 // NOTE: RVV 1.0 specs define the use of __riscv_ prefix for instrinsic functions
1146 size_t vl = __riscv_vsetvl_e32m1(4); // Load up to 4 floats into a vector register
1147 vfloat32m1_t vsrc = __riscv_vle32_v_f32m1(src, vl); // Load float32 values
1148
1149 // Clamp to [0.0f, 1.0f]
1150 vfloat32m1_t vzero = __riscv_vfmv_v_f_f32m1(0.0f, vl);
1151 vfloat32m1_t vone = __riscv_vfmv_v_f_f32m1(1.0f, vl);
1152 vsrc = __riscv_vfmin_vv_f32m1(vsrc, vone, vl);
1153 vsrc = __riscv_vfmax_vv_f32m1(vsrc, vzero, vl);
1154
1155 // Multiply by 255.0f and add 0.5f for rounding
1156 vfloat32m1_t vscaled = __riscv_vfmul_vf_f32m1(vsrc, 255.0f, vl);
1157 vscaled = __riscv_vfadd_vf_f32m1(vscaled, 0.5f, vl);
1158
1159 // Convert to unsigned integer (truncate toward zero)
1160 vuint32m1_t vu32 = __riscv_vfcvt_xu_f_v_u32m1(vscaled, vl);
1161
1162 // Narrow from u32 -> u8
1163 vuint8m1_t vu8 = __riscv_vnclipu_wx_u8m1(vu32, 0, vl); // Round toward zero
1164 __riscv_vse8_v_u8m1(dst, vu8, vl); // Store result
1165#else
1166 for (int i = 0; i < 4; i++)
1167 {
1168 float val = src[i]*255.0f;
1169 val = (val > 255.0f)? 255.0f : val;
1170 val = (val < 0.0f)? 0.0f : val;
1171 dst[i] = (uint8_t)val;
1172 }
1173#endif
1174}
1175
1176static inline void sw_float_from_unorm8_simd(float dst[4], const uint8_t src[4])
1177{
1178#if defined(SW_HAS_NEON)
1179 uint8x8_t bytes8 = vld1_u8(src); //< Read 8 bytes, faster, but let's hope we're not at the end of the page (unlikely)...
1180 uint16x8_t bytes16 = vmovl_u8(bytes8);
1181 uint32x4_t ints = vmovl_u16(vget_low_u16(bytes16));
1182 float32x4_t floats = vcvtq_f32_u32(ints);
1183 floats = vmulq_n_f32(floats, SW_INV_255);
1184 vst1q_f32(dst, floats);
1185#elif defined(SW_HAS_SSE41)
1186 __m128i bytes = _mm_cvtsi32_si128(*(const uint32_t *)src);
1187 __m128i ints = _mm_cvtepu8_epi32(bytes);
1188 __m128 floats = _mm_cvtepi32_ps(ints);
1189 floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255));
1190 _mm_storeu_ps(dst, floats);
1191#elif defined(SW_HAS_SSE2)
1192 __m128i bytes = _mm_cvtsi32_si128(*(const uint32_t *)src);
1193 bytes = _mm_unpacklo_epi8(bytes, _mm_setzero_si128());
1194 __m128i ints = _mm_unpacklo_epi16(bytes, _mm_setzero_si128());
1195 __m128 floats = _mm_cvtepi32_ps(ints);
1196 floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255));
1197 _mm_storeu_ps(dst, floats);
1198#elif defined(SW_HAS_RVV)
1199 // TODO: Sample code generated by AI, needs testing and review
1200 size_t vl = __riscv_vsetvl_e8m1(4); // Set vector length for 8-bit input elements
1201 vuint8m1_t vsrc_u8 = __riscv_vle8_v_u8m1(src, vl); // Load 4 unsigned 8-bit integers
1202 vuint32m1_t vsrc_u32 = __riscv_vwcvt_xu_u_v_u32m1(vsrc_u8, vl); // Widen to 32-bit unsigned integers
1203 vfloat32m1_t vsrc_f32 = __riscv_vfcvt_f_xu_v_f32m1(vsrc_u32, vl); // Convert to float32
1204 vfloat32m1_t vnorm = __riscv_vfmul_vf_f32m1(vsrc_f32, SW_INV_255, vl); // Multiply by 1/255.0 to normalize
1205 __riscv_vse32_v_f32m1(dst, vnorm, vl); // Store result
1206#else
1207 dst[0] = (float)src[0]*SW_INV_255;
1208 dst[1] = (float)src[1]*SW_INV_255;
1209 dst[2] = (float)src[2]*SW_INV_255;
1210 dst[3] = (float)src[3]*SW_INV_255;
1211#endif
1212}
1213
1214// Half conversion functions
1215static inline uint32_t sw_half_to_float_ui(uint16_t h)
1216{
1217 uint32_t s = (uint32_t)(h & 0x8000) << 16;
1218 int32_t em = h & 0x7fff;
1219
1220 // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
1221 int32_t r = (em + (112 << 10)) << 13;
1222
1223 // denormal: flush to zero
1224 r = (em < (1 << 10))? 0 : r;
1225
1226 // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
1227 // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
1228 r += (em >= (31 << 10))? (112 << 23) : 0;
1229
1230 return s | r;
1231}
1232
1233static inline float sw_half_to_float(sw_half_t y)
1234{
1235 union { float f; uint32_t i; } v = { .i = sw_half_to_float_ui(y) };
1236
1237 return v.f;
1238}
1239
1240static inline uint16_t sw_half_from_float_ui(uint32_t ui)
1241{
1242 int32_t s = (ui >> 16) & 0x8000;
1243 int32_t em = ui & 0x7fffffff;
1244
1245 // Bias exponent and round to nearest; 112 is relative exponent bias (127-15)
1246 int32_t h = (em - (112 << 23) + (1 << 12)) >> 13;
1247
1248 // Underflow: flush to zero; 113 encodes exponent -14
1249 h = (em < (113 << 23))? 0 : h;
1250
1251 // Overflow: infinity; 143 encodes exponent 16
1252 h = (em >= (143 << 23))? 0x7c00 : h;
1253
1254 // NaN; note that we convert all types of NaN to qNaN
1255 h = (em > (255 << 23))? 0x7e00 : h;
1256
1257 return (uint16_t)(s | h);
1258}
1259
1260static inline sw_half_t sw_half_from_float(float i)
1261{
1262 union { float f; uint32_t i; } v;
1263 v.f = i;
1264 return sw_half_from_float_ui(v.i);
1265}
1266
1267// Framebuffer management functions
1268//-------------------------------------------------------------------------------------------
1269static inline bool sw_framebuffer_load(int w, int h)
1270{
1271 int size = w*h;
1272
1273 RLSW.framebuffer.pixels = SW_MALLOC(sizeof(sw_pixel_t)*size);
1274 if (RLSW.framebuffer.pixels == NULL) return false;
1275
1276 RLSW.framebuffer.width = w;
1277 RLSW.framebuffer.height = h;
1278 RLSW.framebuffer.allocSz = size;
1279
1280 return true;
1281}
1282
1283static inline bool sw_framebuffer_resize(int w, int h)
1284{
1285 int newSize = w*h;
1286
1287 if (newSize <= RLSW.framebuffer.allocSz)
1288 {
1289 RLSW.framebuffer.width = w;
1290 RLSW.framebuffer.height = h;
1291 return true;
1292 }
1293
1294 void *newPixels = SW_REALLOC(RLSW.framebuffer.pixels, sizeof(sw_pixel_t)*newSize);
1295 if (newPixels == NULL) return false;
1296
1297 RLSW.framebuffer.pixels = newPixels;
1298
1299 RLSW.framebuffer.width = w;
1300 RLSW.framebuffer.height = h;
1301 RLSW.framebuffer.allocSz = newSize;
1302
1303 return true;
1304}
1305
1306static inline void sw_framebuffer_read_color(float dst[4], const sw_pixel_t *src)
1307{
1308#if SW_COLOR_IS_PACKED
1309 SW_COLOR_TYPE pixel = src->color[0];
1310 dst[0] = SW_TO_FLOAT_R(SW_UNPACK_R(pixel));
1311 dst[1] = SW_TO_FLOAT_G(SW_UNPACK_G(pixel));
1312 dst[2] = SW_TO_FLOAT_B(SW_UNPACK_B(pixel));
1313 dst[3] = 1.0f;
1314#else
1315 sw_float_from_unorm8_simd(dst, src->color);
1316#endif
1317}
1318
1319static inline void sw_framebuffer_read_color8(uint8_t dst[4], const sw_pixel_t *src)
1320{
1321#if SW_COLOR_IS_PACKED
1322 SW_COLOR_TYPE pixel = src->color[0];
1323 dst[0] = SW_SCALE_R(SW_UNPACK_R(pixel));
1324 dst[1] = SW_SCALE_G(SW_UNPACK_G(pixel));
1325 dst[2] = SW_SCALE_B(SW_UNPACK_B(pixel));
1326 dst[3] = 255;
1327#else
1328 const SW_COLOR_TYPE *p = src->color;
1329 dst[0] = p[0];
1330 dst[1] = p[1];
1331 dst[2] = p[2];
1332 dst[3] = p[3];
1333#endif
1334}
1335
1336static inline float sw_framebuffer_read_depth(const sw_pixel_t *src)
1337{
1338#if SW_DEPTH_IS_PACKED
1339 return src->depth[0]*SW_DEPTH_SCALE;
1340#else
1341 return SW_UNPACK_DEPTH(src->depth)*SW_DEPTH_SCALE;
1342#endif
1343}
1344
1345static inline void sw_framebuffer_write_color(sw_pixel_t *dst, const float src[4])
1346{
1347#if SW_COLOR_IS_PACKED
1348 dst->color[0] = SW_PACK_COLOR(src[0], src[1], src[2]);
1349#else
1350 sw_float_to_unorm8_simd(dst->color, src);
1351#endif
1352}
1353
1354static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth)
1355{
1356 depth = sw_saturate(depth); // REVIEW: An overflow can occur in certain circumstances with clipping, and needs to be reviewed...
1357
1358#if SW_DEPTH_IS_PACKED
1359 dst->depth[0] = SW_PACK_DEPTH(depth);
1360#else
1361 dst->depth[0] = SW_PACK_DEPTH_0(depth);
1362 dst->depth[1] = SW_PACK_DEPTH_1(depth);
1363 dst->depth[2] = SW_PACK_DEPTH_2(depth);
1364#endif
1365}
1366
1367static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const SW_COLOR_TYPE color[SW_COLOR_PACK_COMP])
1368{
1369 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
1370 {
1371 int w = RLSW.scMax[0] - RLSW.scMin[0] + 1;
1372 for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++)
1373 {
1374 sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0];
1375 for (int x = 0; x < w; x++, row++)
1376 {
1377 for (int i = 0; i < SW_COLOR_PACK_COMP; i++) row->color[i] = color[i];
1378 }
1379 }
1380 }
1381 else
1382 {
1383 for (int i = 0; i < size; i++, ptr++)
1384 {
1385 for (int j = 0; j < SW_COLOR_PACK_COMP; j++) ptr->color[j] = color[j];
1386 }
1387 }
1388}
1389
1390static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, const SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP])
1391{
1392 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
1393 {
1394 int w = RLSW.scMax[0] - RLSW.scMin[0] + 1;
1395 for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++)
1396 {
1397 sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0];
1398 for (int x = 0; x < w; x++, row++)
1399 {
1400 for (int i = 0; i < SW_DEPTH_PACK_COMP; i++) row->depth[i] = depth[i];
1401 }
1402 }
1403 }
1404 else
1405 {
1406 for (int i = 0; i < size; i++, ptr++)
1407 {
1408 for (int j = 0; j < SW_DEPTH_PACK_COMP; j++) ptr->depth[j] = depth[j];
1409 }
1410 }
1411}
1412
1413static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, sw_pixel_t value)
1414{
1415 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
1416 {
1417 int w = RLSW.scMax[0] - RLSW.scMin[0] + 1;
1418 for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++)
1419 {
1420 sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0];
1421 for (int x = 0; x < w; x++, row++) *row = value;
1422 }
1423 }
1424 else
1425 {
1426 for (int i = 0; i < size; i++, ptr++) *ptr = value;
1427 }
1428}
1429
1430static inline void sw_framebuffer_copy_fast(void* dst)
1431{
1432 int size = RLSW.framebuffer.width*RLSW.framebuffer.height;
1433 const sw_pixel_t *pixels = RLSW.framebuffer.pixels;
1434
1435#if SW_COLOR_BUFFER_BITS == 8
1436 uint8_t *dst8 = (uint8_t*)dst;
1437 for (int i = 0; i < size; i++) dst8[i] = pixels[i].color[0];
1438#elif SW_COLOR_BUFFER_BITS == 16
1439 uint16_t *dst16 = (uint16_t*)dst;
1440 for (int i = 0; i < size; i++) dst16[i] = *(uint16_t*)pixels[i].color;
1441#else // 32 bits
1442 uint32_t *dst32 = (uint32_t*)dst;
1443 #if SW_GL_FRAMEBUFFER_COPY_BGRA
1444 for (int i = 0; i < size; i++)
1445 {
1446 const uint8_t *c = pixels[i].color;
1447 dst32[i] = (uint32_t)c[2] | ((uint32_t)c[1] << 8) | ((uint32_t)c[0] << 16) | ((uint32_t)c[3] << 24);
1448 }
1449 #else // RGBA
1450 for (int i = 0; i < size; i++) dst32[i] = *(uint32_t*)pixels[i].color;
1451 #endif
1452#endif
1453}
1454
1455#define DEFINE_FRAMEBUFFER_COPY_BEGIN(name, DST_PTR_T) \
1456static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \
1457{ \
1458 const int stride = RLSW.framebuffer.width; \
1459 const sw_pixel_t *src = RLSW.framebuffer.pixels + (y*stride + x); \
1460 \
1461 for (int iy = 0; iy < h; iy++) { \
1462 const sw_pixel_t *line = src; \
1463 for (int ix = 0; ix < w; ix++) { \
1464 uint8_t color[4]; \
1465 sw_framebuffer_read_color8(color, line); \
1466
1467#define DEFINE_FRAMEBUFFER_COPY_END() \
1468 ++line; \
1469 } \
1470 src += stride; \
1471 } \
1472}
1473
1474DEFINE_FRAMEBUFFER_COPY_BEGIN(GRAYSCALE, uint8_t)
1475{
1476 // NTSC grayscale conversion: Y = 0.299R + 0.587G + 0.114B
1477 uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000);
1478 *dst++ = gray;
1479}
1480DEFINE_FRAMEBUFFER_COPY_END()
1481
1482DEFINE_FRAMEBUFFER_COPY_BEGIN(GRAYALPHA, uint8_t)
1483{
1484 // Convert RGB to grayscale using NTSC formula
1485 uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000);
1486
1487 dst[0] = gray;
1488 dst[1] = color[3]; // alpha
1489
1490 dst += 2;
1491}
1492DEFINE_FRAMEBUFFER_COPY_END()
1493
1494DEFINE_FRAMEBUFFER_COPY_BEGIN(R5G6B5, uint16_t)
1495{
1496 // Convert 8-bit RGB to 5:6:5 format
1497 uint8_t r5 = (color[0]*31 + 127)/255;
1498 uint8_t g6 = (color[1]*63 + 127)/255;
1499 uint8_t b5 = (color[2]*31 + 127)/255;
1500
1501#if SW_GL_FRAMEBUFFER_COPY_BGRA
1502 uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5;
1503#else // RGBA
1504 uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5;
1505#endif
1506
1507 *dst++ = rgb565;
1508}
1509DEFINE_FRAMEBUFFER_COPY_END()
1510
1511DEFINE_FRAMEBUFFER_COPY_BEGIN(R8G8B8, uint8_t)
1512{
1513#if SW_GL_FRAMEBUFFER_COPY_BGRA
1514 dst[0] = color[2];
1515 dst[1] = color[1];
1516 dst[2] = color[0];
1517#else // RGBA
1518 dst[0] = color[0];
1519 dst[1] = color[1];
1520 dst[2] = color[2];
1521#endif
1522
1523 dst += 3;
1524}
1525DEFINE_FRAMEBUFFER_COPY_END()
1526
1527DEFINE_FRAMEBUFFER_COPY_BEGIN(R5G5B5A1, uint16_t)
1528{
1529 uint8_t r5 = (color[0]*31 + 127)/255;
1530 uint8_t g5 = (color[1]*31 + 127)/255;
1531 uint8_t b5 = (color[2]*31 + 127)/255;
1532 uint8_t a1 = (color[3] >= 128)? 1 : 0;
1533
1534#if SW_GL_FRAMEBUFFER_COPY_BGRA
1535 uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1;
1536#else // RGBA
1537 uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
1538#endif
1539
1540 *dst++ = pixel;
1541}
1542DEFINE_FRAMEBUFFER_COPY_END()
1543
1544DEFINE_FRAMEBUFFER_COPY_BEGIN(R4G4B4A4, uint16_t)
1545{
1546 uint8_t r4 = (color[0]*15 + 127)/255;
1547 uint8_t g4 = (color[1]*15 + 127)/255;
1548 uint8_t b4 = (color[2]*15 + 127)/255;
1549 uint8_t a4 = (color[3]*15 + 127)/255;
1550
1551#if SW_GL_FRAMEBUFFER_COPY_BGRA
1552 uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4;
1553#else // RGBA
1554 uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4;
1555#endif
1556
1557 *dst++ = pixel;
1558}
1559DEFINE_FRAMEBUFFER_COPY_END()
1560
1561DEFINE_FRAMEBUFFER_COPY_BEGIN(R8G8B8A8, uint8_t)
1562{
1563#if SW_GL_FRAMEBUFFER_COPY_BGRA
1564 dst[0] = color[2];
1565 dst[1] = color[1];
1566 dst[2] = color[0];
1567#else // RGBA
1568 dst[0] = color[0];
1569 dst[1] = color[1];
1570 dst[2] = color[2];
1571#endif
1572 dst[3] = color[3];
1573
1574 dst += 4;
1575}
1576DEFINE_FRAMEBUFFER_COPY_END()
1577
1578#define DEFINE_FRAMEBUFFER_BLIT_BEGIN(name, DST_PTR_T) \
1579static inline void sw_framebuffer_blit_to_##name( \
1580 int xDst, int yDst, int wDst, int hDst, \
1581 int xSrc, int ySrc, int wSrc, int hSrc, \
1582 DST_PTR_T *dst) \
1583{ \
1584 const sw_pixel_t *srcBase = RLSW.framebuffer.pixels; \
1585 const int fbWidth = RLSW.framebuffer.width; \
1586 \
1587 const uint32_t xScale = ((uint32_t)wSrc << 16)/(uint32_t)wDst; \
1588 const uint32_t yScale = ((uint32_t)hSrc << 16)/(uint32_t)hDst; \
1589 \
1590 for (int dy = 0; dy < hDst; dy++) { \
1591 uint32_t yFix = ((uint32_t)ySrc << 16) + dy*yScale; \
1592 int sy = yFix >> 16; \
1593 const sw_pixel_t *srcLine = srcBase + sy*fbWidth + xSrc; \
1594 \
1595 const sw_pixel_t *srcPtr = srcLine; \
1596 for (int dx = 0; dx < wDst; dx++) { \
1597 uint32_t xFix = dx*xScale; \
1598 int sx = xFix >> 16; \
1599 const sw_pixel_t *pixel = srcPtr + sx; \
1600 uint8_t color[4]; \
1601 sw_framebuffer_read_color8(color, pixel);
1602
1603#define DEFINE_FRAMEBUFFER_BLIT_END() \
1604 } \
1605 } \
1606}
1607
1608DEFINE_FRAMEBUFFER_BLIT_BEGIN(GRAYSCALE, uint8_t)
1609{
1610 uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000);
1611 *dst++ = gray;
1612}
1613DEFINE_FRAMEBUFFER_BLIT_END()
1614
1615DEFINE_FRAMEBUFFER_BLIT_BEGIN(GRAYALPHA, uint8_t)
1616{
1617 uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000);
1618
1619 dst[0] = gray;
1620 dst[1] = color[3]; // alpha
1621
1622 dst += 2;
1623}
1624DEFINE_FRAMEBUFFER_BLIT_END()
1625
1626DEFINE_FRAMEBUFFER_BLIT_BEGIN(R5G6B5, uint16_t)
1627{
1628 uint8_t r5 = (color[0]*31 + 127)/255;
1629 uint8_t g6 = (color[1]*63 + 127)/255;
1630 uint8_t b5 = (color[2]*31 + 127)/255;
1631
1632#if SW_GL_FRAMEBUFFER_COPY_BGRA
1633 uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5;
1634#else // RGBA
1635 uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5;
1636#endif
1637
1638 *dst++ = rgb565;
1639}
1640DEFINE_FRAMEBUFFER_BLIT_END()
1641
1642DEFINE_FRAMEBUFFER_BLIT_BEGIN(R8G8B8, uint8_t)
1643{
1644#if SW_GL_FRAMEBUFFER_COPY_BGRA
1645 dst[0] = color[2];
1646 dst[1] = color[1];
1647 dst[2] = color[0];
1648#else // RGBA
1649 dst[0] = color[0];
1650 dst[1] = color[1];
1651 dst[2] = color[2];
1652#endif
1653
1654 dst += 3;
1655}
1656DEFINE_FRAMEBUFFER_BLIT_END()
1657
1658DEFINE_FRAMEBUFFER_BLIT_BEGIN(R5G5B5A1, uint16_t)
1659{
1660 uint8_t r5 = (color[0]*31 + 127)/255;
1661 uint8_t g5 = (color[1]*31 + 127)/255;
1662 uint8_t b5 = (color[2]*31 + 127)/255;
1663 uint8_t a1 = (color[3] >= 128)? 1 : 0;
1664
1665#if SW_GL_FRAMEBUFFER_COPY_BGRA
1666 uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1;
1667#else // RGBA
1668 uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
1669#endif
1670
1671 *dst++ = pixel;
1672}
1673DEFINE_FRAMEBUFFER_BLIT_END()
1674
1675DEFINE_FRAMEBUFFER_BLIT_BEGIN(R4G4B4A4, uint16_t)
1676{
1677 uint8_t r4 = (color[0]*15 + 127)/255;
1678 uint8_t g4 = (color[1]*15 + 127)/255;
1679 uint8_t b4 = (color[2]*15 + 127)/255;
1680 uint8_t a4 = (color[3]*15 + 127)/255;
1681
1682#if SW_GL_FRAMEBUFFER_COPY_BGRA
1683 uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4;
1684#else // RGBA
1685 uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4;
1686#endif
1687
1688 *dst++ = pixel;
1689}
1690DEFINE_FRAMEBUFFER_BLIT_END()
1691
1692DEFINE_FRAMEBUFFER_BLIT_BEGIN(R8G8B8A8, uint8_t)
1693{
1694#if SW_GL_FRAMEBUFFER_COPY_BGRA
1695 dst[0] = color[2];
1696 dst[1] = color[1];
1697 dst[2] = color[0];
1698#else // RGBA
1699 dst[0] = color[0];
1700 dst[1] = color[1];
1701 dst[2] = color[2];
1702#endif
1703 dst[3] = color[3];
1704
1705 dst += 4;
1706}
1707DEFINE_FRAMEBUFFER_BLIT_END()
1708//-------------------------------------------------------------------------------------------
1709
1710// Pixel format management functions
1711//-------------------------------------------------------------------------------------------
1712static inline int sw_get_pixel_format(SWformat format, SWtype type)
1713{
1714 int channels = 0;
1715 int bitsPerChannel = 8; // Default: 8 bits per channel
1716
1717 // Determine the number of channels (format)
1718 switch (format)
1719 {
1720 case SW_LUMINANCE: channels = 1; break;
1721 case SW_LUMINANCE_ALPHA: channels = 2; break;
1722 case SW_RGB: channels = 3; break;
1723 case SW_RGBA: channels = 4; break;
1724 default: return SW_PIXELFORMAT_UNKNOWN;
1725 }
1726
1727 // Determine the depth of each channel (type)
1728 switch (type)
1729 {
1730 case SW_UNSIGNED_BYTE: bitsPerChannel = 8; break;
1731 case SW_BYTE: bitsPerChannel = 8; break;
1732 case SW_UNSIGNED_SHORT: bitsPerChannel = 16; break;
1733 case SW_SHORT: bitsPerChannel = 16; break;
1734 case SW_UNSIGNED_INT: bitsPerChannel = 32; break;
1735 case SW_INT: bitsPerChannel = 32; break;
1736 case SW_FLOAT: bitsPerChannel = 32; break;
1737 default: return SW_PIXELFORMAT_UNKNOWN;
1738 }
1739
1740 // Map the format and type to the correct internal format
1741 if (bitsPerChannel == 8)
1742 {
1743 if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE;
1744 if (channels == 2) return SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA;
1745 if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8;
1746 if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8;
1747 }
1748 else if (bitsPerChannel == 16)
1749 {
1750 if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R16;
1751 if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16;
1752 if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16;
1753 }
1754 else if (bitsPerChannel == 32)
1755 {
1756 if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R32;
1757 if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32;
1758 if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32;
1759 }
1760
1761 return SW_PIXELFORMAT_UNKNOWN;
1762}
1763
1764static inline void sw_get_pixel(uint8_t *color, const void *pixels, uint32_t offset, sw_pixelformat_t format)
1765{
1766 switch (format)
1767 {
1768 case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE:
1769 {
1770 uint8_t gray = ((const uint8_t*)pixels)[offset];
1771 color[0] = gray;
1772 color[1] = gray;
1773 color[2] = gray;
1774 color[3] = 255;
1775 break;
1776 }
1777 case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA:
1778 {
1779 const uint8_t *src = &((const uint8_t*)pixels)[offset*2];
1780 color[0] = src[0];
1781 color[1] = src[0];
1782 color[2] = src[0];
1783 color[3] = src[1];
1784 break;
1785 }
1786 case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5:
1787 {
1788 uint16_t pixel = ((const uint16_t*)pixels)[offset];
1789 color[0] = ((pixel >> 11) & 0x1F)*255/31; // R (5 bits)
1790 color[1] = ((pixel >> 5) & 0x3F)*255/63; // G (6 bits)
1791 color[2] = (pixel & 0x1F)*255/31; // B (5 bits)
1792 color[3] = 255;
1793 break;
1794 }
1795 case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8:
1796 {
1797 const uint8_t *src = &((const uint8_t*)pixels)[offset*3];
1798 color[0] = src[0];
1799 color[1] = src[1];
1800 color[2] = src[2];
1801 color[3] = 255;
1802 break;
1803 }
1804 case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1:
1805 {
1806 uint16_t pixel = ((const uint16_t*)pixels)[offset];
1807 color[0] = ((pixel >> 11) & 0x1F)*255/31; // R (5 bits)
1808 color[1] = ((pixel >> 6) & 0x1F)*255/31; // G (5 bits)
1809 color[2] = ((pixel >> 1) & 0x1F)*255/31; // B (5 bits)
1810 color[3] = (pixel & 0x01)*255; // A (1 bit)
1811 break;
1812 }
1813 case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4:
1814 {
1815 uint16_t pixel = ((const uint16_t*)pixels)[offset];
1816 color[0] = ((pixel >> 12) & 0x0F)*255/15; // R (4 bits)
1817 color[1] = ((pixel >> 8) & 0x0F)*255/15; // G (4 bits)
1818 color[2] = ((pixel >> 4) & 0x0F)*255/15; // B (4 bits)
1819 color[3] = (pixel & 0x0F)*255/15; // A (4 bits)
1820 break;
1821 }
1822 case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8:
1823 {
1824 const uint8_t *src = &((const uint8_t*)pixels)[offset*4];
1825 color[0] = src[0];
1826 color[1] = src[1];
1827 color[2] = src[2];
1828 color[3] = src[3];
1829 break;
1830 }
1831 case SW_PIXELFORMAT_UNCOMPRESSED_R32:
1832 {
1833 float val = ((const float*)pixels)[offset];
1834 uint8_t gray = (uint8_t)(val*255.0f);
1835 color[0] = gray;
1836 color[1] = gray;
1837 color[2] = gray;
1838 color[3] = 255;
1839 break;
1840 }
1841 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32:
1842 {
1843 const float *src = &((const float*)pixels)[offset*3];
1844 color[0] = (uint8_t)(src[0]*255.0f);
1845 color[1] = (uint8_t)(src[1]*255.0f);
1846 color[2] = (uint8_t)(src[2]*255.0f);
1847 color[3] = 255;
1848 break;
1849 }
1850 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32:
1851 {
1852 const float *src = &((const float*)pixels)[offset*4];
1853 color[0] = (uint8_t)(src[0]*255.0f);
1854 color[1] = (uint8_t)(src[1]*255.0f);
1855 color[2] = (uint8_t)(src[2]*255.0f);
1856 color[3] = (uint8_t)(src[3]*255.0f);
1857 break;
1858 }
1859 case SW_PIXELFORMAT_UNCOMPRESSED_R16:
1860 {
1861 uint16_t val = ((const uint16_t*)pixels)[offset];
1862 uint8_t gray = sw_half_to_float(val)*SW_INV_255;
1863 color[0] = gray;
1864 color[1] = gray;
1865 color[2] = gray;
1866 color[3] = 255;
1867 break;
1868 }
1869 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16:
1870 {
1871 const uint16_t *src = &((const uint16_t*)pixels)[offset*3];
1872 color[0] = sw_half_to_float(src[0])*SW_INV_255;
1873 color[1] = sw_half_to_float(src[1])*SW_INV_255;
1874 color[2] = sw_half_to_float(src[2])*SW_INV_255;
1875 color[3] = 255;
1876 break;
1877 }
1878 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16:
1879 {
1880 const uint16_t *src = &((const uint16_t*)pixels)[offset*4];
1881 color[0] = sw_half_to_float(src[0])*SW_INV_255;
1882 color[1] = sw_half_to_float(src[1])*SW_INV_255;
1883 color[2] = sw_half_to_float(src[2])*SW_INV_255;
1884 color[3] = sw_half_to_float(src[3])*SW_INV_255;
1885 break;
1886 }
1887 case SW_PIXELFORMAT_UNKNOWN:
1888 default:
1889 {
1890 color[0] = 0;
1891 color[1] = 0;
1892 color[2] = 0;
1893 color[3] = 0;
1894 break;
1895 }
1896 }
1897}
1898//-------------------------------------------------------------------------------------------
1899
1900// Texture sampling functionality
1901//-------------------------------------------------------------------------------------------
1902static inline void sw_texture_fetch(float* color, const sw_texture_t* tex, int x, int y)
1903{
1904 sw_float_from_unorm8_simd(color, &tex->pixels[4*(y*tex->width + x)]);
1905}
1906
1907static inline void sw_texture_sample_nearest(float *color, const sw_texture_t *tex, float u, float v)
1908{
1909 u = (tex->sWrap == SW_REPEAT)? sw_fract(u) : sw_saturate(u);
1910 v = (tex->tWrap == SW_REPEAT)? sw_fract(v) : sw_saturate(v);
1911
1912 int x = u*tex->width;
1913 int y = v*tex->height;
1914
1915 sw_texture_fetch(color, tex, x, y);
1916}
1917
1918static inline void sw_texture_sample_linear(float *color, const sw_texture_t *tex, float u, float v)
1919{
1920 // TODO: With a bit more cleverness we could clearly reduce the
1921 // number of operations here, but for now it works fine
1922
1923 float xf = (u*tex->width) - 0.5f;
1924 float yf = (v*tex->height) - 0.5f;
1925
1926 float fx = sw_fract(xf);
1927 float fy = sw_fract(yf);
1928
1929 int x0 = (int)xf;
1930 int y0 = (int)yf;
1931
1932 int x1 = x0 + 1;
1933 int y1 = y0 + 1;
1934
1935 // NOTE: If the textures are POT we could avoid the division for SW_REPEAT
1936
1937 if (tex->sWrap == SW_CLAMP)
1938 {
1939 x0 = (x0 > tex->wMinus1)? tex->wMinus1 : x0;
1940 x1 = (x1 > tex->wMinus1)? tex->wMinus1 : x1;
1941 }
1942 else
1943 {
1944 x0 = (x0%tex->width + tex->width)%tex->width;
1945 x1 = (x1%tex->width + tex->width)%tex->width;
1946 }
1947
1948 if (tex->tWrap == SW_CLAMP)
1949 {
1950 y0 = (y0 > tex->hMinus1)? tex->hMinus1 : y0;
1951 y1 = (y1 > tex->hMinus1)? tex->hMinus1 : y1;
1952 }
1953 else
1954 {
1955 y0 = (y0%tex->height + tex->height)%tex->height;
1956 y1 = (y1%tex->height + tex->height)%tex->height;
1957 }
1958
1959 float c00[4], c10[4], c01[4], c11[4];
1960 sw_texture_fetch(c00, tex, x0, y0);
1961 sw_texture_fetch(c10, tex, x1, y0);
1962 sw_texture_fetch(c01, tex, x0, y1);
1963 sw_texture_fetch(c11, tex, x1, y1);
1964
1965 for (int i = 0; i < 4; i++)
1966 {
1967 float t = c00[i] + fx*(c10[i] - c00[i]);
1968 float b = c01[i] + fx*(c11[i] - c01[i]);
1969 color[i] = t + fy*(b - t);
1970 }
1971}
1972
1973static inline void sw_texture_sample(float *color, const sw_texture_t *tex, float u, float v, float dUdx, float dUdy, float dVdx, float dVdy)
1974{
1975 // Previous method: There is no need to compute the square root
1976 // because using the squared value, the comparison remains `L2 > 1.0f*1.0f`
1977 //float du = sqrtf(dUdx*dUdx + dUdy*dUdy);
1978 //float dv = sqrtf(dVdx*dVdx + dVdy*dVdy);
1979 //float L = (du > dv)? du : dv;
1980
1981 // Calculate the derivatives for each axis
1982 float dU2 = dUdx*dUdx + dUdy*dUdy;
1983 float dV2 = dVdx*dVdx + dVdy*dVdy;
1984 float L2 = (dU2 > dV2)? dU2 : dV2;
1985
1986 SWfilter filter = (L2 > 1.0f)? tex->minFilter : tex->magFilter;
1987
1988 switch (filter)
1989 {
1990 case SW_NEAREST: sw_texture_sample_nearest(color, tex, u, v); break;
1991 case SW_LINEAR: sw_texture_sample_linear(color, tex, u, v); break;
1992 default: break;
1993 }
1994}
1995//-------------------------------------------------------------------------------------------
1996
1997// Color blending functionality
1998//-------------------------------------------------------------------------------------------
1999static inline void sw_factor_zero(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2000{
2001 factor[0] = factor[1] = factor[2] = factor[3] = 0.0f;
2002}
2003
2004static inline void sw_factor_one(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2005{
2006 factor[0] = factor[1] = factor[2] = factor[3] = 1.0f;
2007}
2008
2009static inline void sw_factor_src_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2010{
2011 factor[0] = src[0]; factor[1] = src[1]; factor[2] = src[2]; factor[3] = src[3];
2012}
2013
2014static inline void sw_factor_one_minus_src_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2015{
2016 factor[0] = 1.0f - src[0]; factor[1] = 1.0f - src[1];
2017 factor[2] = 1.0f - src[2]; factor[3] = 1.0f - src[3];
2018}
2019
2020static inline void sw_factor_src_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2021{
2022 factor[0] = factor[1] = factor[2] = factor[3] = src[3];
2023}
2024
2025static inline void sw_factor_one_minus_src_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2026{
2027 float invAlpha = 1.0f - src[3];
2028 factor[0] = factor[1] = factor[2] = factor[3] = invAlpha;
2029}
2030
2031static inline void sw_factor_dst_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2032{
2033 factor[0] = factor[1] = factor[2] = factor[3] = dst[3];
2034}
2035
2036static inline void sw_factor_one_minus_dst_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2037{
2038 float invAlpha = 1.0f - dst[3];
2039 factor[0] = factor[1] = factor[2] = factor[3] = invAlpha;
2040}
2041
2042static inline void sw_factor_dst_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2043{
2044 factor[0] = dst[0]; factor[1] = dst[1]; factor[2] = dst[2]; factor[3] = dst[3];
2045}
2046
2047static inline void sw_factor_one_minus_dst_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2048{
2049 factor[0] = 1.0f - dst[0]; factor[1] = 1.0f - dst[1];
2050 factor[2] = 1.0f - dst[2]; factor[3] = 1.0f - dst[3];
2051}
2052
2053static inline void sw_factor_src_alpha_saturate(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst)
2054{
2055 factor[0] = factor[1] = factor[2] = 1.0f;
2056 factor[3] = (src[3] < 1.0f)? src[3] : 1.0f;
2057}
2058
2059static inline void sw_blend_colors(float *SW_RESTRICT dst/*[4]*/, const float *SW_RESTRICT src/*[4]*/)
2060{
2061 float srcFactor[4], dstFactor[4];
2062
2063 RLSW.srcFactorFunc(srcFactor, src, dst);
2064 RLSW.dstFactorFunc(dstFactor, src, dst);
2065
2066 dst[0] = srcFactor[0]*src[0] + dstFactor[0]*dst[0];
2067 dst[1] = srcFactor[1]*src[1] + dstFactor[1]*dst[1];
2068 dst[2] = srcFactor[2]*src[2] + dstFactor[2]*dst[2];
2069 dst[3] = srcFactor[3]*src[3] + dstFactor[3]*dst[3];
2070}
2071//-------------------------------------------------------------------------------------------
2072
2073// Projection helper functions
2074//-------------------------------------------------------------------------------------------
2075static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4])
2076{
2077 screen[0] = RLSW.vpCenter[0] + ndc[0]*RLSW.vpHalf[0] + 0.5f;
2078 screen[1] = RLSW.vpCenter[1] - ndc[1]*RLSW.vpHalf[1] + 0.5f;
2079}
2080//-------------------------------------------------------------------------------------------
2081
2082// Polygon clipping management
2083//-------------------------------------------------------------------------------------------
2084#define DEFINE_CLIP_FUNC(name, FUNC_IS_INSIDE, FUNC_COMPUTE_T) \
2085static inline int sw_clip_##name( \
2086 sw_vertex_t output[SW_MAX_CLIPPED_POLYGON_VERTICES], \
2087 const sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES], \
2088 int n) \
2089{ \
2090 const sw_vertex_t *prev = &input[n - 1]; \
2091 int prevInside = FUNC_IS_INSIDE(prev->homogeneous); \
2092 int outputCount = 0; \
2093 \
2094 for (int i = 0; i < n; i++) { \
2095 const sw_vertex_t *curr = &input[i]; \
2096 int currInside = FUNC_IS_INSIDE(curr->homogeneous); \
2097 \
2098 /* If transition between interior/exterior, calculate intersection point */ \
2099 if (prevInside != currInside) { \
2100 float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \
2101 sw_lerp_vertex_PTCH(&output[outputCount++], prev, curr, t); \
2102 } \
2103 \
2104 /* If current vertex inside, add it */ \
2105 if (currInside) { \
2106 output[outputCount++] = *curr; \
2107 } \
2108 \
2109 prev = curr; \
2110 prevInside = currInside; \
2111 } \
2112 \
2113 return outputCount; \
2114}
2115//-------------------------------------------------------------------------------------------
2116
2117// Frustum cliping functions
2118//-------------------------------------------------------------------------------------------
2119#define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON)
2120#define IS_INSIDE_PLANE_X_POS(h) ( (h)[0] < (h)[3]) // Exclusive for +X
2121#define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] < (h)[3]) // Exclusive for -X
2122#define IS_INSIDE_PLANE_Y_POS(h) ( (h)[1] < (h)[3]) // Exclusive for +Y
2123#define IS_INSIDE_PLANE_Y_NEG(h) (-(h)[1] < (h)[3]) // Exclusive for -Y
2124#define IS_INSIDE_PLANE_Z_POS(h) ( (h)[2] <= (h)[3]) // Inclusive for +Z
2125#define IS_INSIDE_PLANE_Z_NEG(h) (-(h)[2] <= (h)[3]) // Inclusive for -Z
2126
2127#define COMPUTE_T_PLANE_W(hPrev, hCurr) ((SW_CLIP_EPSILON - (hPrev)[3])/((hCurr)[3] - (hPrev)[3]))
2128#define COMPUTE_T_PLANE_X_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[0])/(((hPrev)[3] - (hPrev)[0]) - ((hCurr)[3] - (hCurr)[0])))
2129#define COMPUTE_T_PLANE_X_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[0])/(((hPrev)[3] + (hPrev)[0]) - ((hCurr)[3] + (hCurr)[0])))
2130#define COMPUTE_T_PLANE_Y_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[1])/(((hPrev)[3] - (hPrev)[1]) - ((hCurr)[3] - (hCurr)[1])))
2131#define COMPUTE_T_PLANE_Y_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[1])/(((hPrev)[3] + (hPrev)[1]) - ((hCurr)[3] + (hCurr)[1])))
2132#define COMPUTE_T_PLANE_Z_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[2])/(((hPrev)[3] - (hPrev)[2]) - ((hCurr)[3] - (hCurr)[2])))
2133#define COMPUTE_T_PLANE_Z_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[2])/(((hPrev)[3] + (hPrev)[2]) - ((hCurr)[3] + (hCurr)[2])))
2134
2135DEFINE_CLIP_FUNC(w, IS_INSIDE_PLANE_W, COMPUTE_T_PLANE_W)
2136DEFINE_CLIP_FUNC(x_pos, IS_INSIDE_PLANE_X_POS, COMPUTE_T_PLANE_X_POS)
2137DEFINE_CLIP_FUNC(x_neg, IS_INSIDE_PLANE_X_NEG, COMPUTE_T_PLANE_X_NEG)
2138DEFINE_CLIP_FUNC(y_pos, IS_INSIDE_PLANE_Y_POS, COMPUTE_T_PLANE_Y_POS)
2139DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG)
2140DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS)
2141DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG)
2142//-------------------------------------------------------------------------------------------
2143
2144// Scissor clip functions
2145//-------------------------------------------------------------------------------------------
2146#define COMPUTE_T_SCISSOR_X_MIN(hPrev, hCurr) (((RLSW.scClipMin[0])*(hPrev)[3] - (hPrev)[0])/(((hCurr)[0] - (RLSW.scClipMin[0])*(hCurr)[3]) - ((hPrev)[0] - (RLSW.scClipMin[0])*(hPrev)[3])))
2147#define COMPUTE_T_SCISSOR_X_MAX(hPrev, hCurr) (((RLSW.scClipMax[0])*(hPrev)[3] - (hPrev)[0])/(((hCurr)[0] - (RLSW.scClipMax[0])*(hCurr)[3]) - ((hPrev)[0] - (RLSW.scClipMax[0])*(hPrev)[3])))
2148#define COMPUTE_T_SCISSOR_Y_MIN(hPrev, hCurr) (((RLSW.scClipMin[1])*(hPrev)[3] - (hPrev)[1])/(((hCurr)[1] - (RLSW.scClipMin[1])*(hCurr)[3]) - ((hPrev)[1] - (RLSW.scClipMin[1])*(hPrev)[3])))
2149#define COMPUTE_T_SCISSOR_Y_MAX(hPrev, hCurr) (((RLSW.scClipMax[1])*(hPrev)[3] - (hPrev)[1])/(((hCurr)[1] - (RLSW.scClipMax[1])*(hCurr)[3]) - ((hPrev)[1] - (RLSW.scClipMax[1])*(hPrev)[3])))
2150
2151#define IS_INSIDE_SCISSOR_X_MIN(h) ((h)[0] >= (RLSW.scClipMin[0])*(h)[3])
2152#define IS_INSIDE_SCISSOR_X_MAX(h) ((h)[0] <= (RLSW.scClipMax[0])*(h)[3])
2153#define IS_INSIDE_SCISSOR_Y_MIN(h) ((h)[1] >= (RLSW.scClipMin[1])*(h)[3])
2154#define IS_INSIDE_SCISSOR_Y_MAX(h) ((h)[1] <= (RLSW.scClipMax[1])*(h)[3])
2155
2156DEFINE_CLIP_FUNC(scissor_x_min, IS_INSIDE_SCISSOR_X_MIN, COMPUTE_T_SCISSOR_X_MIN)
2157DEFINE_CLIP_FUNC(scissor_x_max, IS_INSIDE_SCISSOR_X_MAX, COMPUTE_T_SCISSOR_X_MAX)
2158DEFINE_CLIP_FUNC(scissor_y_min, IS_INSIDE_SCISSOR_Y_MIN, COMPUTE_T_SCISSOR_Y_MIN)
2159DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX)
2160//-------------------------------------------------------------------------------------------
2161
2162// Main polygon clip function
2163static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int *vertexCounter)
2164{
2165 static sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES];
2166
2167 int n = *vertexCounter;
2168
2169 #define CLIP_AGAINST_PLANE(FUNC_CLIP) \
2170 { \
2171 n = FUNC_CLIP(tmp, polygon, n); \
2172 if (n < 3) \
2173 { \
2174 *vertexCounter = 0; \
2175 return false; \
2176 } \
2177 for (int i = 0; i < n; i++) polygon[i] = tmp[i]; \
2178 }
2179
2180 CLIP_AGAINST_PLANE(sw_clip_w);
2181 CLIP_AGAINST_PLANE(sw_clip_x_pos);
2182 CLIP_AGAINST_PLANE(sw_clip_x_neg);
2183 CLIP_AGAINST_PLANE(sw_clip_y_pos);
2184 CLIP_AGAINST_PLANE(sw_clip_y_neg);
2185 CLIP_AGAINST_PLANE(sw_clip_z_pos);
2186 CLIP_AGAINST_PLANE(sw_clip_z_neg);
2187
2188 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
2189 {
2190 CLIP_AGAINST_PLANE(sw_clip_scissor_x_min);
2191 CLIP_AGAINST_PLANE(sw_clip_scissor_x_max);
2192 CLIP_AGAINST_PLANE(sw_clip_scissor_y_min);
2193 CLIP_AGAINST_PLANE(sw_clip_scissor_y_max);
2194 }
2195
2196 *vertexCounter = n;
2197
2198 return (n >= 3);
2199}
2200
2201// Triangle rendering logic
2202//-------------------------------------------------------------------------------------------
2203static inline bool sw_triangle_face_culling(void)
2204{
2205 // NOTE: Face culling is done before clipping to avoid unnecessary computations
2206 // To handle triangles crossing the w=0 plane correctly,
2207 // we perform the winding order test in homogeneous coordinates directly,
2208 // before the perspective division (division by w)
2209 // This test determines the orientation of the triangle in the (x,y,w) plane,
2210 // which corresponds to the projected 2D winding order sign,
2211 // even with negative w values
2212
2213 // Preload homogeneous coordinates into local variables
2214 const float *h0 = RLSW.vertexBuffer[0].homogeneous;
2215 const float *h1 = RLSW.vertexBuffer[1].homogeneous;
2216 const float *h2 = RLSW.vertexBuffer[2].homogeneous;
2217
2218 // Compute a value proportional to the signed area in the projected 2D plane,
2219 // calculated directly using homogeneous coordinates BEFORE division by w
2220 // This is the determinant of the matrix formed by the (x, y, w) components
2221 // of the vertices, which correctly captures the winding order in homogeneous
2222 // space and its relationship to the projected 2D winding order, even with
2223 // negative w values
2224 // The determinant formula used here is:
2225 // h0.x*(h1.y*h2.w - h2.y*h1.w) +
2226 // h1.x*(h2.y*h0.w - h0.y*h2.w) +
2227 // h2.x*(h0.y*h1.w - h1.y*h0.w)
2228
2229 const float hSgnArea =
2230 h0[0]*(h1[1]*h2[3] - h2[1]*h1[3]) +
2231 h1[0]*(h2[1]*h0[3] - h0[1]*h2[3]) +
2232 h2[0]*(h0[1]*h1[3] - h1[1]*h0[3]);
2233
2234 // Discard the triangle if its winding order (determined by the sign
2235 // of the homogeneous area/determinant) matches the culled direction
2236 // A positive hSgnArea typically corresponds to a counter-clockwise
2237 // winding in the projected space when all w > 0
2238 // This test is robust for points with w > 0 or w < 0, correctly
2239 // capturing the change in orientation when crossing the w=0 plane
2240
2241 // The culling logic remains the same based on the signed area/determinant
2242 // A value of 0 for hSgnArea means the points are collinear in (x, y, w)
2243 // space, which corresponds to a degenerate triangle projection
2244 // Such triangles are typically not culled by this test (0 < 0 is false, 0 > 0 is false)
2245 // and should be handled by the clipper if necessary
2246 return (RLSW.cullFace == SW_FRONT)? (hSgnArea < 0) : (hSgnArea > 0); // Cull if winding is "clockwise" : "counter-clockwise"
2247}
2248
2249static inline void sw_triangle_clip_and_project(void)
2250{
2251 sw_vertex_t *polygon = RLSW.vertexBuffer;
2252 int *vertexCounter = &RLSW.vertexCounter;
2253
2254 if (sw_polygon_clip(polygon, vertexCounter))
2255 {
2256 // Transformation to screen space and normalization
2257 for (int i = 0; i < *vertexCounter; i++)
2258 {
2259 sw_vertex_t *v = &polygon[i];
2260
2261 // Calculation of the reciprocal of W for normalization
2262 // as well as perspective-correct attributes
2263 const float wRcp = 1.0f/v->homogeneous[3];
2264 v->homogeneous[3] = wRcp;
2265
2266 // Division of XYZ coordinates by weight
2267 v->homogeneous[0] *= wRcp;
2268 v->homogeneous[1] *= wRcp;
2269 v->homogeneous[2] *= wRcp;
2270
2271 // Division of texture coordinates (perspective-correct)
2272 v->texcoord[0] *= wRcp;
2273 v->texcoord[1] *= wRcp;
2274
2275 // Division of colors (perspective-correct)
2276 v->color[0] *= wRcp;
2277 v->color[1] *= wRcp;
2278 v->color[2] *= wRcp;
2279 v->color[3] *= wRcp;
2280
2281 // Transformation to screen space
2282 sw_project_ndc_to_screen(v->screen, v->homogeneous);
2283 }
2284 }
2285}
2286
2287#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \
2288static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, \
2289 const sw_vertex_t *end, float dUdy, float dVdy) \
2290{ \
2291 /* Gets the start and end coordinates */ \
2292 int xStart = (int)start->screen[0]; \
2293 int xEnd = (int)end->screen[0]; \
2294 \
2295 /* Avoid empty lines */ \
2296 if (xStart == xEnd) return; \
2297 \
2298 /* Compute the subpixel distance to traverse before the first pixel */ \
2299 float xSubstep = 1.0f - sw_fract(start->screen[0]); \
2300 \
2301 /* Compute the inverse horizontal distance along the X axis */ \
2302 float dxRcp = 1.0f/(end->screen[0] - start->screen[0]); \
2303 \
2304 /* Compute the interpolation steps along the X axis */ \
2305 float dZdx = (end->homogeneous[2] - start->homogeneous[2])*dxRcp; \
2306 float dWdx = (end->homogeneous[3] - start->homogeneous[3])*dxRcp; \
2307 \
2308 float dCdx[4] = { \
2309 (end->color[0] - start->color[0])*dxRcp, \
2310 (end->color[1] - start->color[1])*dxRcp, \
2311 (end->color[2] - start->color[2])*dxRcp, \
2312 (end->color[3] - start->color[3])*dxRcp \
2313 }; \
2314 \
2315 float dUdx = 0.0f; \
2316 float dVdx = 0.0f; \
2317 if (ENABLE_TEXTURE) { \
2318 dUdx = (end->texcoord[0] - start->texcoord[0])*dxRcp; \
2319 dVdx = (end->texcoord[1] - start->texcoord[1])*dxRcp; \
2320 } \
2321 \
2322 /* Initializing the interpolation starting values */ \
2323 float z = start->homogeneous[2] + dZdx*xSubstep; \
2324 float w = start->homogeneous[3] + dWdx*xSubstep; \
2325 \
2326 float color[4] = { \
2327 start->color[0] + dCdx[0]*xSubstep, \
2328 start->color[1] + dCdx[1]*xSubstep, \
2329 start->color[2] + dCdx[2]*xSubstep, \
2330 start->color[3] + dCdx[3]*xSubstep \
2331 }; \
2332 \
2333 float u = 0.0f; \
2334 float v = 0.0f; \
2335 if (ENABLE_TEXTURE) { \
2336 u = start->texcoord[0] + dUdx*xSubstep; \
2337 v = start->texcoord[1] + dVdx*xSubstep; \
2338 } \
2339 \
2340 /* Pre-calculate the starting pointers for the framebuffer row */ \
2341 int y = (int)start->screen[1]; \
2342 sw_pixel_t *ptr = RLSW.framebuffer.pixels + y*RLSW.framebuffer.width + xStart; \
2343 \
2344 /* Scanline rasterization */ \
2345 for (int x = xStart; x < xEnd; x++) \
2346 { \
2347 float wRcp = 1.0f/w; \
2348 float srcColor[4] = { \
2349 color[0]*wRcp, \
2350 color[1]*wRcp, \
2351 color[2]*wRcp, \
2352 color[3]*wRcp \
2353 }; \
2354 \
2355 if (ENABLE_DEPTH_TEST) \
2356 { \
2357 /* TODO: Implement different depth funcs? */ \
2358 float depth = sw_framebuffer_read_depth(ptr); \
2359 if (z > depth) goto discard; \
2360 } \
2361 \
2362 /* TODO: Implement depth mask */ \
2363 sw_framebuffer_write_depth(ptr, z); \
2364 \
2365 if (ENABLE_TEXTURE) \
2366 { \
2367 float texColor[4]; \
2368 float s = u*wRcp; \
2369 float t = v*wRcp; \
2370 sw_texture_sample(texColor, tex, s, t, dUdx, dUdy, dVdx, dVdy); \
2371 srcColor[0] *= texColor[0]; \
2372 srcColor[1] *= texColor[1]; \
2373 srcColor[2] *= texColor[2]; \
2374 srcColor[3] *= texColor[3]; \
2375 } \
2376 \
2377 if (ENABLE_COLOR_BLEND) \
2378 { \
2379 float dstColor[4]; \
2380 sw_framebuffer_read_color(dstColor, ptr); \
2381 sw_blend_colors(dstColor, srcColor); \
2382 sw_framebuffer_write_color(ptr, dstColor); \
2383 } \
2384 else \
2385 { \
2386 sw_framebuffer_write_color(ptr, srcColor); \
2387 } \
2388 \
2389 /* Increment the interpolation parameter, UVs, and pointers */ \
2390 discard: \
2391 z += dZdx; \
2392 w += dWdx; \
2393 color[0] += dCdx[0]; \
2394 color[1] += dCdx[1]; \
2395 color[2] += dCdx[2]; \
2396 color[3] += dCdx[3]; \
2397 if (ENABLE_TEXTURE) \
2398 { \
2399 u += dUdx; \
2400 v += dVdx; \
2401 } \
2402 ++ptr; \
2403 } \
2404}
2405
2406#define DEFINE_TRIANGLE_RASTER(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \
2407static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1, \
2408 const sw_vertex_t *v2, const sw_texture_t *tex) \
2409{ \
2410 /* Swap vertices by increasing y */ \
2411 if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t *tmp = v0; v0 = v1; v1 = tmp; } \
2412 if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t *tmp = v1; v1 = v2; v2 = tmp; } \
2413 if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t *tmp = v0; v0 = v1; v1 = tmp; } \
2414 \
2415 /* Extracting coordinates from the sorted vertices */ \
2416 float x0 = v0->screen[0], y0 = v0->screen[1]; \
2417 float x1 = v1->screen[0], y1 = v1->screen[1]; \
2418 float x2 = v2->screen[0], y2 = v2->screen[1]; \
2419 \
2420 /* Compute height differences */ \
2421 float h02 = y2 - y0; \
2422 float h01 = y1 - y0; \
2423 float h12 = y2 - y1; \
2424 \
2425 if (h02 < 1e-6f) return; \
2426 \
2427 /* Precompute the inverse values without additional checks */ \
2428 float h02Rcp = 1.0f/h02; \
2429 float h01Rcp = (h01 > 1e-6f)? 1.0f/h01 : 0.0f; \
2430 float h12Rcp = (h12 > 1e-6f)? 1.0f/h12 : 0.0f; \
2431 \
2432 /* Pre-calculation of slopes */ \
2433 float dXdy02 = (x2 - x0)*h02Rcp; \
2434 float dXdy01 = (x1 - x0)*h01Rcp; \
2435 float dXdy12 = (x2 - x1)*h12Rcp; \
2436 \
2437 /* Y subpixel correction */ \
2438 float y0Substep = 1.0f - sw_fract(y0); \
2439 float y1Substep = 1.0f - sw_fract(y1); \
2440 \
2441 /* Y bounds (vertical clipping) */ \
2442 int yTop = (int)y0; \
2443 int yMid = (int)y1; \
2444 int yBot = (int)y2; \
2445 \
2446 /* Compute gradients for each side of the triangle */ \
2447 sw_vertex_t dVXdy02, dVXdy01, dVXdy12; \
2448 sw_get_vertex_grad_PTCH(&dVXdy02, v0, v2, h02Rcp); \
2449 sw_get_vertex_grad_PTCH(&dVXdy01, v0, v1, h01Rcp); \
2450 sw_get_vertex_grad_PTCH(&dVXdy12, v1, v2, h12Rcp); \
2451 \
2452 /* Get a copy of vertices for interpolation and apply substep correction */ \
2453 sw_vertex_t vLeft = *v0, vRight = *v0; \
2454 sw_add_vertex_grad_scaled_PTCH(&vLeft, &dVXdy02, y0Substep); \
2455 sw_add_vertex_grad_scaled_PTCH(&vRight, &dVXdy01, y0Substep); \
2456 \
2457 vLeft.screen[0] += dXdy02*y0Substep; \
2458 vRight.screen[0] += dXdy01*y0Substep; \
2459 \
2460 /* Scanline for the upper part of the triangle */ \
2461 for (int y = yTop; y < yMid; y++) \
2462 { \
2463 vLeft.screen[1] = vRight.screen[1] = y; \
2464 \
2465 if (vLeft.screen[0] < vRight.screen[0]) FUNC_SCANLINE(tex, &vLeft, &vRight, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \
2466 else FUNC_SCANLINE(tex, &vRight, &vLeft, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \
2467 \
2468 sw_add_vertex_grad_PTCH(&vLeft, &dVXdy02); \
2469 vLeft.screen[0] += dXdy02; \
2470 \
2471 sw_add_vertex_grad_PTCH(&vRight, &dVXdy01); \
2472 vRight.screen[0] += dXdy01; \
2473 } \
2474 \
2475 /* Get a copy of next right for interpolation and apply substep correction */ \
2476 vRight = *v1; \
2477 sw_add_vertex_grad_scaled_PTCH(&vRight, &dVXdy12, y1Substep); \
2478 vRight.screen[0] += dXdy12*y1Substep; \
2479 \
2480 /* Scanline for the lower part of the triangle */ \
2481 for (int y = yMid; y < yBot; y++) \
2482 { \
2483 vLeft.screen[1] = vRight.screen[1] = y; \
2484 \
2485 if (vLeft.screen[0] < vRight.screen[0]) FUNC_SCANLINE(tex, &vLeft, &vRight, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \
2486 else FUNC_SCANLINE(tex, &vRight, &vLeft, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \
2487 \
2488 sw_add_vertex_grad_PTCH(&vLeft, &dVXdy02); \
2489 vLeft.screen[0] += dXdy02; \
2490 \
2491 sw_add_vertex_grad_PTCH(&vRight, &dVXdy12); \
2492 vRight.screen[0] += dXdy12; \
2493 } \
2494}
2495
2496DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, 0, 0, 0)
2497DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX, 1, 0, 0)
2498DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH, 0, 1, 0)
2499DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_BLEND, 0, 0, 1)
2500DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH, 1, 1, 0)
2501DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_BLEND, 1, 0, 1)
2502DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH_BLEND, 0, 1, 1)
2503DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH_BLEND, 1, 1, 1)
2504
2505DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false)
2506DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX, sw_triangle_raster_scanline_TEX, true)
2507DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH, sw_triangle_raster_scanline_DEPTH, false)
2508DEFINE_TRIANGLE_RASTER(sw_triangle_raster_BLEND, sw_triangle_raster_scanline_BLEND, false)
2509DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH, sw_triangle_raster_scanline_TEX_DEPTH, true)
2510DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND, sw_triangle_raster_scanline_TEX_BLEND, true)
2511DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND, sw_triangle_raster_scanline_DEPTH_BLEND, false)
2512DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_scanline_TEX_DEPTH_BLEND, true)
2513
2514static inline void sw_triangle_render(void)
2515{
2516 if (RLSW.stateFlags & SW_STATE_CULL_FACE)
2517 {
2518 if (!sw_triangle_face_culling()) return;
2519 }
2520
2521 sw_triangle_clip_and_project();
2522
2523 if (RLSW.vertexCounter < 3) return;
2524
2525 #define TRIANGLE_RASTER(RASTER_FUNC) \
2526 { \
2527 for (int i = 0; i < RLSW.vertexCounter - 2; i++) \
2528 { \
2529 RASTER_FUNC( \
2530 &RLSW.vertexBuffer[0], \
2531 &RLSW.vertexBuffer[i + 1], \
2532 &RLSW.vertexBuffer[i + 2], \
2533 &RLSW.loadedTextures[RLSW.currentTexture] \
2534 ); \
2535 } \
2536 }
2537
2538 uint32_t state = RLSW.stateFlags;
2539 if (RLSW.currentTexture == 0) state &= ~SW_STATE_TEXTURE_2D;
2540 if ((RLSW.srcFactor == SW_ONE) && (RLSW.dstFactor == SW_ZERO)) state &= ~SW_STATE_BLEND;
2541
2542 if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND)
2543 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND)
2544 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND)
2545 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH)
2546 else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_BLEND)
2547 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH)
2548 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) TRIANGLE_RASTER(sw_triangle_raster_TEX)
2549 else TRIANGLE_RASTER(sw_triangle_raster)
2550
2551 #undef TRIANGLE_RASTER
2552}
2553//-------------------------------------------------------------------------------------------
2554
2555// Quad rendering logic
2556//-------------------------------------------------------------------------------------------
2557static inline bool sw_quad_face_culling(void)
2558{
2559 // NOTE: Face culling is done before clipping to avoid unnecessary computations
2560 // To handle quads crossing the w=0 plane correctly,
2561 // we perform the winding order test in homogeneous coordinates directly,
2562 // before the perspective division (division by w)
2563 // For a convex quad with vertices P0, P1, P2, P3 in sequential order,
2564 // the winding order of the quad is the same as the winding order
2565 // of the triangle P0 P1 P2. We use the homogeneous triangle
2566 // winding test on this first triangle
2567
2568 // Preload homogeneous coordinates into local variables
2569 const float *h0 = RLSW.vertexBuffer[0].homogeneous;
2570 const float *h1 = RLSW.vertexBuffer[1].homogeneous;
2571 const float *h2 = RLSW.vertexBuffer[2].homogeneous;
2572
2573 // NOTE: h3 is not needed for this test
2574 // const float *h3 = RLSW.vertexBuffer[3].homogeneous;
2575
2576 // Compute a value proportional to the signed area of the triangle P0 P1 P2
2577 // in the projected 2D plane, calculated directly using homogeneous coordinates
2578 // BEFORE division by w
2579 // This is the determinant of the matrix formed by the (x, y, w) components
2580 // of the vertices P0, P1, and P2. Its sign correctly indicates the winding order
2581 // in homogeneous space and its relationship to the projected 2D winding order,
2582 // even with negative w values
2583 // The determinant formula used here is:
2584 // h0.x*(h1.y*h2.w - h2.y*h1.w) +
2585 // h1.x*(h2.y*h0.w - h0.y*h2.w) +
2586 // h2.x*(h0.y*h1.w - h1.y*h0.w)
2587
2588 const float hSgnArea =
2589 h0[0]*(h1[1]*h2[3] - h2[1]*h1[3]) +
2590 h1[0]*(h2[1]*h0[3] - h0[1]*h2[3]) +
2591 h2[0]*(h0[1]*h1[3] - h1[1]*h0[3]);
2592
2593 // Perform face culling based on the winding order determined by the sign
2594 // of the homogeneous area/determinant of triangle P0 P1 P2
2595 // This test is robust for points with w > 0 or w < 0 within the triangle,
2596 // correctly capturing the change in orientation when crossing the w=0 plane
2597
2598 // A positive hSgnArea typically corresponds to a counter-clockwise
2599 // winding in the projected space when all w > 0
2600 // A value of 0 for hSgnArea means P0, P1, P2 are collinear in (x, y, w)
2601 // space, which corresponds to a degenerate triangle projection
2602 // Such quads might also be degenerate or non-planar. They are typically
2603 // not culled by this test (0 < 0 is false, 0 > 0 is false)
2604 // and should be handled by the clipper if necessary
2605
2606 return (RLSW.cullFace == SW_FRONT)? (hSgnArea < 0.0f) : (hSgnArea > 0.0f); // Cull if winding is "clockwise" : "counter-clockwise"
2607}
2608
2609static inline void sw_quad_clip_and_project(void)
2610{
2611 sw_vertex_t *polygon = RLSW.vertexBuffer;
2612 int *vertexCounter = &RLSW.vertexCounter;
2613
2614 if (sw_polygon_clip(polygon, vertexCounter))
2615 {
2616 // Transformation to screen space and normalization
2617 for (int i = 0; i < *vertexCounter; i++)
2618 {
2619 sw_vertex_t *v = &polygon[i];
2620
2621 // Calculation of the reciprocal of W for normalization
2622 // as well as perspective-correct attributes
2623 const float wRcp = 1.0f/v->homogeneous[3];
2624 v->homogeneous[3] = wRcp;
2625
2626 // Division of XYZ coordinates by weight
2627 v->homogeneous[0] *= wRcp;
2628 v->homogeneous[1] *= wRcp;
2629 v->homogeneous[2] *= wRcp;
2630
2631 // Division of texture coordinates (perspective-correct)
2632 v->texcoord[0] *= wRcp;
2633 v->texcoord[1] *= wRcp;
2634
2635 // Division of colors (perspective-correct)
2636 v->color[0] *= wRcp;
2637 v->color[1] *= wRcp;
2638 v->color[2] *= wRcp;
2639 v->color[3] *= wRcp;
2640
2641 // Transformation to screen space
2642 sw_project_ndc_to_screen(v->screen, v->homogeneous);
2643 }
2644 }
2645}
2646
2647static inline bool sw_quad_is_axis_aligned(void)
2648{
2649 // Reject quads with perspective projection
2650 // The fast path assumes affine (non-perspective) quads,
2651 // so we require all vertices to have homogeneous w = 1.0
2652 for (int i = 0; i < 4; i++)
2653 {
2654 if (RLSW.vertexBuffer[i].homogeneous[3] != 1.0f) return false;
2655 }
2656
2657 // Epsilon tolerance in screen space (pixels)
2658 const float epsilon = 0.5f;
2659
2660 // Fetch screen-space positions for the four quad vertices
2661 const float *p0 = RLSW.vertexBuffer[0].screen;
2662 const float *p1 = RLSW.vertexBuffer[1].screen;
2663 const float *p2 = RLSW.vertexBuffer[2].screen;
2664 const float *p3 = RLSW.vertexBuffer[3].screen;
2665
2666 // Compute edge vectors between consecutive vertices
2667 // These define the four sides of the quad in screen space
2668 float dx01 = p1[0] - p0[0], dy01 = p1[1] - p0[1];
2669 float dx12 = p2[0] - p1[0], dy12 = p2[1] - p1[1];
2670 float dx23 = p3[0] - p2[0], dy23 = p3[1] - p2[1];
2671 float dx30 = p0[0] - p3[0], dy30 = p0[1] - p3[1];
2672
2673 // Each edge must be either horizontal or vertical within epsilon tolerance
2674 // If any edge deviates significantly from either axis, the quad is not axis-aligned
2675 if (!((fabsf(dy01) < epsilon) || (fabsf(dx01) < epsilon))) return false;
2676 if (!((fabsf(dy12) < epsilon) || (fabsf(dx12) < epsilon))) return false;
2677 if (!((fabsf(dy23) < epsilon) || (fabsf(dx23) < epsilon))) return false;
2678 if (!((fabsf(dy30) < epsilon) || (fabsf(dx30) < epsilon))) return false;
2679
2680 return true;
2681}
2682
2683static inline void sw_quad_sort_cw(const sw_vertex_t* *output)
2684{
2685 const sw_vertex_t *input = RLSW.vertexBuffer;
2686
2687 // Calculate the centroid of the quad
2688 float cx = (input[0].screen[0] + input[1].screen[0] +
2689 input[2].screen[0] + input[3].screen[0])*0.25f;
2690 float cy = (input[0].screen[1] + input[1].screen[1] +
2691 input[2].screen[1] + input[3].screen[1])*0.25f;
2692
2693 // Calculate the angle of each vertex relative to the center
2694 // and assign them directly to their correct position
2695 const sw_vertex_t *corners[4] = { 0 };
2696
2697 for (int i = 0; i < 4; i++)
2698 {
2699 float dx = input[i].screen[0] - cx;
2700 float dy = input[i].screen[1] - cy;
2701
2702 // Determine the quadrant (clockwise from top-left)
2703 // top-left: dx < 0, dy < 0
2704 // top-right: dx >= 0, dy < 0
2705 // bottom-right: dx >= 0, dy >= 0
2706 // bottom-left: dx < 0, dy >= 0
2707
2708 int idx;
2709 if (dy < 0) idx = (dx < 0)? 0 : 1; // Top row
2710 else idx = (dx < 0)? 3 : 2; // Bottom row
2711
2712 corners[idx] = &input[i];
2713 }
2714
2715 output[0] = corners[0]; // top-left
2716 output[1] = corners[1]; // top-right
2717 output[2] = corners[2]; // bottom-right
2718 output[3] = corners[3]; // bottom-left
2719}
2720
2721// TODO: REVIEW: Could a perfectly aligned quad, where one of the four points has a different depth,
2722// still appear perfectly aligned from a certain point of view?
2723// Because in that case, we would still need to perform perspective division for textures and colors...
2724#define DEFINE_QUAD_RASTER_AXIS_ALIGNED(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \
2725static inline void FUNC_NAME(void) \
2726{ \
2727 const sw_vertex_t *sortedVerts[4]; \
2728 sw_quad_sort_cw(sortedVerts); \
2729 \
2730 const sw_vertex_t *v0 = sortedVerts[0]; \
2731 const sw_vertex_t *v1 = sortedVerts[1]; \
2732 const sw_vertex_t *v2 = sortedVerts[2]; \
2733 const sw_vertex_t *v3 = sortedVerts[3]; \
2734 \
2735 /* Screen bounds (axis-aligned) */ \
2736 int xMin = (int)v0->screen[0]; \
2737 int yMin = (int)v0->screen[1]; \
2738 int xMax = (int)v2->screen[0]; \
2739 int yMax = (int)v2->screen[1]; \
2740 \
2741 float w = v2->screen[0] - v0->screen[0]; \
2742 float h = v2->screen[1] - v0->screen[1]; \
2743 \
2744 if ((w == 0) || (h == 0)) return; \
2745 \
2746 float wRcp = (w > 0.0f)? 1.0f/w : 0.0f; \
2747 float hRcp = (h > 0.0f)? 1.0f/h : 0.0f; \
2748 \
2749 /* Subpixel corrections */ \
2750 float xSubstep = 1.0f - sw_fract(v0->screen[0]); \
2751 float ySubstep = 1.0f - sw_fract(v0->screen[1]); \
2752 \
2753 /* Calculation of vertex gradients in X and Y */ \
2754 float dUdx = 0.0f, dVdx = 0.0f; \
2755 float dUdy = 0.0f, dVdy = 0.0f; \
2756 if (ENABLE_TEXTURE) { \
2757 dUdx = (v1->texcoord[0] - v0->texcoord[0])*wRcp; \
2758 dVdx = (v1->texcoord[1] - v0->texcoord[1])*wRcp; \
2759 dUdy = (v3->texcoord[0] - v0->texcoord[0])*hRcp; \
2760 dVdy = (v3->texcoord[1] - v0->texcoord[1])*hRcp; \
2761 } \
2762 \
2763 float dCdx[4], dCdy[4]; \
2764 dCdx[0] = (v1->color[0] - v0->color[0])*wRcp; \
2765 dCdx[1] = (v1->color[1] - v0->color[1])*wRcp; \
2766 dCdx[2] = (v1->color[2] - v0->color[2])*wRcp; \
2767 dCdx[3] = (v1->color[3] - v0->color[3])*wRcp; \
2768 dCdy[0] = (v3->color[0] - v0->color[0])*hRcp; \
2769 dCdy[1] = (v3->color[1] - v0->color[1])*hRcp; \
2770 dCdy[2] = (v3->color[2] - v0->color[2])*hRcp; \
2771 dCdy[3] = (v3->color[3] - v0->color[3])*hRcp; \
2772 \
2773 float dZdx, dZdy; \
2774 dZdx = (v1->homogeneous[2] - v0->homogeneous[2])*wRcp; \
2775 dZdy = (v3->homogeneous[2] - v0->homogeneous[2])*hRcp; \
2776 \
2777 /* Start of quad rasterization */ \
2778 const sw_texture_t *tex; \
2779 if (ENABLE_TEXTURE) tex = &RLSW.loadedTextures[RLSW.currentTexture]; \
2780 \
2781 sw_pixel_t *pixels = RLSW.framebuffer.pixels; \
2782 int wDst = RLSW.framebuffer.width; \
2783 \
2784 float zScanline = v0->homogeneous[2] + dZdx*xSubstep + dZdy*ySubstep; \
2785 float uScanline = v0->texcoord[0] + dUdx*xSubstep + dUdy*ySubstep; \
2786 float vScanline = v0->texcoord[1] + dVdx*xSubstep + dVdy*ySubstep; \
2787 \
2788 float colorScanline[4] = { \
2789 v0->color[0] + dCdx[0]*xSubstep + dCdy[0]*ySubstep, \
2790 v0->color[1] + dCdx[1]*xSubstep + dCdy[1]*ySubstep, \
2791 v0->color[2] + dCdx[2]*xSubstep + dCdy[2]*ySubstep, \
2792 v0->color[3] + dCdx[3]*xSubstep + dCdy[3]*ySubstep \
2793 }; \
2794 \
2795 for (int y = yMin; y < yMax; y++) \
2796 { \
2797 sw_pixel_t *ptr = pixels + y*wDst + xMin; \
2798 \
2799 float z = zScanline; \
2800 float u = uScanline; \
2801 float v = vScanline; \
2802 \
2803 float color[4] = { \
2804 colorScanline[0], \
2805 colorScanline[1], \
2806 colorScanline[2], \
2807 colorScanline[3] \
2808 }; \
2809 \
2810 /* Scanline rasterization */ \
2811 for (int x = xMin; x < xMax; x++) \
2812 { \
2813 /* Pixel color computation */ \
2814 float srcColor[4] = { \
2815 color[0], \
2816 color[1], \
2817 color[2], \
2818 color[3] \
2819 }; \
2820 \
2821 /* Test and write depth */ \
2822 if (ENABLE_DEPTH_TEST) \
2823 { \
2824 /* TODO: Implement different depth funcs? */ \
2825 float depth = sw_framebuffer_read_depth(ptr); \
2826 if (z > depth) goto discard; \
2827 } \
2828 \
2829 /* TODO: Implement depth mask */ \
2830 sw_framebuffer_write_depth(ptr, z); \
2831 \
2832 if (ENABLE_TEXTURE) \
2833 { \
2834 float texColor[4]; \
2835 sw_texture_sample(texColor, tex, u, v, dUdx, dUdy, dVdx, dVdy); \
2836 srcColor[0] *= texColor[0]; \
2837 srcColor[1] *= texColor[1]; \
2838 srcColor[2] *= texColor[2]; \
2839 srcColor[3] *= texColor[3]; \
2840 } \
2841 \
2842 if (ENABLE_COLOR_BLEND) \
2843 { \
2844 float dstColor[4]; \
2845 sw_framebuffer_read_color(dstColor, ptr); \
2846 sw_blend_colors(dstColor, srcColor); \
2847 sw_framebuffer_write_color(ptr, dstColor); \
2848 } \
2849 else sw_framebuffer_write_color(ptr, srcColor); \
2850 \
2851 discard: \
2852 z += dZdx; \
2853 color[0] += dCdx[0]; \
2854 color[1] += dCdx[1]; \
2855 color[2] += dCdx[2]; \
2856 color[3] += dCdx[3]; \
2857 if (ENABLE_TEXTURE) \
2858 { \
2859 u += dUdx; \
2860 v += dVdx; \
2861 } \
2862 ++ptr; \
2863 } \
2864 \
2865 zScanline += dZdy; \
2866 colorScanline[0] += dCdy[0]; \
2867 colorScanline[1] += dCdy[1]; \
2868 colorScanline[2] += dCdy[2]; \
2869 colorScanline[3] += dCdy[3]; \
2870 \
2871 if (ENABLE_TEXTURE) \
2872 { \
2873 uScanline += dUdy; \
2874 vScanline += dVdy; \
2875 } \
2876 } \
2877}
2878
2879DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned, 0, 0, 0)
2880DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX, 1, 0, 0)
2881DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH, 0, 1, 0)
2882DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_BLEND, 0, 0, 1)
2883DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH, 1, 1, 0)
2884DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_BLEND, 1, 0, 1)
2885DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH_BLEND, 0, 1, 1)
2886DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND, 1, 1, 1)
2887
2888static inline void sw_quad_render(void)
2889{
2890 if (RLSW.stateFlags & SW_STATE_CULL_FACE)
2891 {
2892 if (!sw_quad_face_culling()) return;
2893 }
2894
2895 sw_quad_clip_and_project();
2896
2897 if (RLSW.vertexCounter < 3) return;
2898
2899 uint32_t state = RLSW.stateFlags;
2900 if (RLSW.currentTexture == 0) state &= ~SW_STATE_TEXTURE_2D;
2901 if ((RLSW.srcFactor == SW_ONE) && (RLSW.dstFactor == SW_ZERO)) state &= ~SW_STATE_BLEND;
2902
2903 if ((RLSW.vertexCounter == 4) && sw_quad_is_axis_aligned())
2904 {
2905 if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND();
2906 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_DEPTH_BLEND();
2907 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_TEX_BLEND();
2908 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) sw_quad_raster_axis_aligned_TEX_DEPTH();
2909 else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) sw_quad_raster_axis_aligned_BLEND();
2910 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) sw_quad_raster_axis_aligned_DEPTH();
2911 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) sw_quad_raster_axis_aligned_TEX();
2912 else sw_quad_raster_axis_aligned();
2913 return;
2914 }
2915
2916 #define TRIANGLE_RASTER(RASTER_FUNC) \
2917 { \
2918 for (int i = 0; i < RLSW.vertexCounter - 2; i++) \
2919 { \
2920 RASTER_FUNC( \
2921 &RLSW.vertexBuffer[0], \
2922 &RLSW.vertexBuffer[i + 1], \
2923 &RLSW.vertexBuffer[i + 2], \
2924 &RLSW.loadedTextures[RLSW.currentTexture] \
2925 ); \
2926 } \
2927 }
2928
2929 if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND)
2930 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND)
2931 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND)
2932 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH)
2933 else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_BLEND)
2934 else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH)
2935 else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) TRIANGLE_RASTER(sw_triangle_raster_TEX)
2936 else TRIANGLE_RASTER(sw_triangle_raster)
2937
2938 #undef TRIANGLE_RASTER
2939}
2940//-------------------------------------------------------------------------------------------
2941
2942// Line rendering logic
2943//-------------------------------------------------------------------------------------------
2944static inline bool sw_line_clip_coord(float q, float p, float *t0, float *t1)
2945{
2946 if (fabsf(p) < SW_CLIP_EPSILON)
2947 {
2948 // Check if the line is entirely outside the window
2949 if (q < -SW_CLIP_EPSILON) return 0; // Completely outside
2950 return 1; // Completely inside or on the edges
2951 }
2952
2953 const float r = q/p;
2954
2955 if (p < 0)
2956 {
2957 if (r > *t1) return 0;
2958 if (r > *t0) *t0 = r;
2959 }
2960 else
2961 {
2962 if (r < *t0) return 0;
2963 if (r < *t1) *t1 = r;
2964 }
2965
2966 return 1;
2967}
2968
2969static inline bool sw_line_clip(sw_vertex_t *v0, sw_vertex_t *v1)
2970{
2971 float t0 = 0.0f, t1 = 1.0f;
2972 float dH[4], dC[4];
2973
2974 for (int i = 0; i < 4; i++)
2975 {
2976 dH[i] = v1->homogeneous[i] - v0->homogeneous[i];
2977 dC[i] = v1->color[i] - v0->color[i];
2978 }
2979
2980 // Clipping Liang-Barsky
2981 if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false;
2982 if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false;
2983 if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false;
2984 if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[1], -dH[3] - dH[1], &t0, &t1)) return false;
2985 if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false;
2986 if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false;
2987
2988 // Clipping Scissor
2989 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
2990 {
2991 if (!sw_line_clip_coord(v0->homogeneous[0] - RLSW.scClipMin[0]*v0->homogeneous[3], RLSW.scClipMin[0]*dH[3] - dH[0], &t0, &t1)) return false;
2992 if (!sw_line_clip_coord(RLSW.scClipMax[0]*v0->homogeneous[3] - v0->homogeneous[0], dH[0] - RLSW.scClipMax[0]*dH[3], &t0, &t1)) return false;
2993 if (!sw_line_clip_coord(v0->homogeneous[1] - RLSW.scClipMin[1]*v0->homogeneous[3], RLSW.scClipMin[1]*dH[3] - dH[1], &t0, &t1)) return false;
2994 if (!sw_line_clip_coord(RLSW.scClipMax[1]*v0->homogeneous[3] - v0->homogeneous[1], dH[1] - RLSW.scClipMax[1]*dH[3], &t0, &t1)) return false;
2995 }
2996
2997 // Interpolation of new coordinates
2998 if (t1 < 1.0f)
2999 {
3000 for (int i = 0; i < 4; i++)
3001 {
3002 v1->homogeneous[i] = v0->homogeneous[i] + t1*dH[i];
3003 v1->color[i] = v0->color[i] + t1*dC[i];
3004 }
3005 }
3006
3007 if (t0 > 0.0f)
3008 {
3009 for (int i = 0; i < 4; i++)
3010 {
3011 v0->homogeneous[i] += t0*dH[i];
3012 v0->color[i] += t0*dC[i];
3013 }
3014 }
3015
3016 return true;
3017}
3018
3019static inline bool sw_line_clip_and_project(sw_vertex_t *v0, sw_vertex_t *v1)
3020{
3021 if (!sw_line_clip(v0, v1)) return false;
3022
3023 // Convert homogeneous coordinates to NDC
3024 v0->homogeneous[3] = 1.0f/v0->homogeneous[3];
3025 v1->homogeneous[3] = 1.0f/v1->homogeneous[3];
3026 for (int i = 0; i < 3; i++)
3027 {
3028 v0->homogeneous[i] *= v0->homogeneous[3];
3029 v1->homogeneous[i] *= v1->homogeneous[3];
3030 }
3031
3032 // Convert NDC coordinates to screen space
3033 sw_project_ndc_to_screen(v0->screen, v0->homogeneous);
3034 sw_project_ndc_to_screen(v1->screen, v1->homogeneous);
3035
3036 return true;
3037}
3038
3039#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \
3040static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1) \
3041{ \
3042 float x0 = v0->screen[0]; \
3043 float y0 = v0->screen[1]; \
3044 float x1 = v1->screen[0]; \
3045 float y1 = v1->screen[1]; \
3046 \
3047 float dx = x1 - x0; \
3048 float dy = y1 - y0; \
3049 \
3050 /* Compute dominant axis and subpixel offset */ \
3051 float steps, substep; \
3052 if (fabsf(dx) > fabsf(dy)) \
3053 { \
3054 steps = fabsf(dx); \
3055 if (steps < 1.0f) return; \
3056 substep = (dx >= 0.0f)? (1.0f - sw_fract(x0)) : sw_fract(x0); \
3057 } \
3058 else \
3059 { \
3060 steps = fabsf(dy); \
3061 if (steps < 1.0f) return; \
3062 substep = (dy >= 0.0f)? (1.0f - sw_fract(y0)) : sw_fract(y0); \
3063 } \
3064 \
3065 /* Compute per pixel increments */ \
3066 float xInc = dx/steps; \
3067 float yInc = dy/steps; \
3068 float stepRcp = 1.0f/steps; \
3069 \
3070 float zInc = (v1->homogeneous[2] - v0->homogeneous[2])*stepRcp; \
3071 float rInc = (v1->color[0] - v0->color[0])*stepRcp; \
3072 float gInc = (v1->color[1] - v0->color[1])*stepRcp; \
3073 float bInc = (v1->color[2] - v0->color[2])*stepRcp; \
3074 float aInc = (v1->color[3] - v0->color[3])*stepRcp; \
3075 \
3076 /* Initializing the interpolation starting values */ \
3077 float x = x0 + xInc*substep; \
3078 float y = y0 + yInc*substep; \
3079 float z = v0->homogeneous[2] + zInc*substep; \
3080 float r = v0->color[0] + rInc*substep; \
3081 float g = v0->color[1] + gInc*substep; \
3082 float b = v0->color[2] + bInc*substep; \
3083 float a = v0->color[3] + aInc*substep; \
3084 \
3085 const int fbWidth = RLSW.framebuffer.width; \
3086 sw_pixel_t *pixels = RLSW.framebuffer.pixels; \
3087 \
3088 int numPixels = (int)(steps - substep) + 1; \
3089 \
3090 for (int i = 0; i < numPixels; i++) \
3091 { \
3092 /* REVIEW: May require reviewing projection details */ \
3093 int px = (int)(x - 0.5f); \
3094 int py = (int)(y - 0.5f); \
3095 \
3096 sw_pixel_t *ptr = pixels + py*fbWidth + px; \
3097 \
3098 if (ENABLE_DEPTH_TEST) \
3099 { \
3100 float depth = sw_framebuffer_read_depth(ptr); \
3101 if (z > depth) goto discard; \
3102 } \
3103 \
3104 sw_framebuffer_write_depth(ptr, z); \
3105 \
3106 float color[4] = {r, g, b, a}; \
3107 \
3108 if (ENABLE_COLOR_BLEND) \
3109 { \
3110 float dstColor[4]; \
3111 sw_framebuffer_read_color(dstColor, ptr); \
3112 sw_blend_colors(dstColor, color); \
3113 sw_framebuffer_write_color(ptr, dstColor); \
3114 } \
3115 else sw_framebuffer_write_color(ptr, color); \
3116 \
3117 discard: \
3118 x += xInc; y += yInc; z += zInc; \
3119 r += rInc; g += gInc; b += bInc; a += aInc; \
3120 } \
3121}
3122
3123#define DEFINE_LINE_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \
3124void FUNC_NAME(const sw_vertex_t *v1, const sw_vertex_t *v2) \
3125{ \
3126 sw_vertex_t tv1, tv2; \
3127 \
3128 int x1 = (int)v1->screen[0]; \
3129 int y1 = (int)v1->screen[1]; \
3130 int x2 = (int)v2->screen[0]; \
3131 int y2 = (int)v2->screen[1]; \
3132 \
3133 int dx = x2 - x1; \
3134 int dy = y2 - y1; \
3135 \
3136 RASTER_FUNC(v1, v2); \
3137 \
3138 if ((dx != 0) && (abs(dy/dx) < 1)) \
3139 { \
3140 int wy = (int)((RLSW.lineWidth - 1.0f)*abs(dx)/sqrtf(dx*dx + dy*dy)); \
3141 wy >>= 1; \
3142 for (int i = 1; i <= wy; i++) \
3143 { \
3144 tv1 = *v1, tv2 = *v2; \
3145 tv1.screen[1] -= i; \
3146 tv2.screen[1] -= i; \
3147 RASTER_FUNC(&tv1, &tv2); \
3148 tv1 = *v1, tv2 = *v2; \
3149 tv1.screen[1] += i; \
3150 tv2.screen[1] += i; \
3151 RASTER_FUNC(&tv1, &tv2); \
3152 } \
3153 } \
3154 else if (dy != 0) \
3155 { \
3156 int wx = (int)((RLSW.lineWidth - 1.0f)*abs(dy)/sqrtf(dx*dx + dy*dy)); \
3157 wx >>= 1; \
3158 for (int i = 1; i <= wx; i++) \
3159 { \
3160 tv1 = *v1, tv2 = *v2; \
3161 tv1.screen[0] -= i; \
3162 tv2.screen[0] -= i; \
3163 RASTER_FUNC(&tv1, &tv2); \
3164 tv1 = *v1, tv2 = *v2; \
3165 tv1.screen[0] += i; \
3166 tv2.screen[0] += i; \
3167 RASTER_FUNC(&tv1, &tv2); \
3168 } \
3169 } \
3170}
3171
3172DEFINE_LINE_RASTER(sw_line_raster, 0, 0)
3173DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0)
3174DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1)
3175DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1)
3176
3177DEFINE_LINE_THICK_RASTER(sw_line_thick_raster, sw_line_raster)
3178DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH, sw_line_raster_DEPTH)
3179DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_BLEND, sw_line_raster_BLEND)
3180DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_BLEND)
3181
3182static inline void sw_line_render(sw_vertex_t *vertices)
3183{
3184 if (!sw_line_clip_and_project(&vertices[0], &vertices[1])) return;
3185
3186 if (RLSW.lineWidth >= 2.0f)
3187 {
3188 if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_line_thick_raster_DEPTH_BLEND(&vertices[0], &vertices[1]);
3189 else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_line_thick_raster_BLEND(&vertices[0], &vertices[1]);
3190 else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_line_thick_raster_DEPTH(&vertices[0], &vertices[1]);
3191 else sw_line_thick_raster(&vertices[0], &vertices[1]);
3192 }
3193 else
3194 {
3195 if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_line_raster_DEPTH_BLEND(&vertices[0], &vertices[1]);
3196 else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_line_raster_BLEND(&vertices[0], &vertices[1]);
3197 else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_line_raster_DEPTH(&vertices[0], &vertices[1]);
3198 else sw_line_raster(&vertices[0], &vertices[1]);
3199 }
3200}
3201//-------------------------------------------------------------------------------------------
3202
3203// Point rendering logic
3204//-------------------------------------------------------------------------------------------
3205static inline bool sw_point_clip_and_project(sw_vertex_t *v)
3206{
3207 if (v->homogeneous[3] != 1.0f)
3208 {
3209 for (int_fast8_t i = 0; i < 3; i++)
3210 {
3211 if ((v->homogeneous[i] < -v->homogeneous[3]) || (v->homogeneous[i] > v->homogeneous[3])) return false;
3212 }
3213
3214 v->homogeneous[3] = 1.0f/v->homogeneous[3];
3215 v->homogeneous[0] *= v->homogeneous[3];
3216 v->homogeneous[1] *= v->homogeneous[3];
3217 v->homogeneous[2] *= v->homogeneous[3];
3218 }
3219
3220 sw_project_ndc_to_screen(v->screen, v->homogeneous);
3221
3222 const int *min = NULL, *max = NULL;
3223
3224 if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST)
3225 {
3226 min = RLSW.scMin;
3227 max = RLSW.scMax;
3228 }
3229 else
3230 {
3231 min = RLSW.vpMin;
3232 max = RLSW.vpMax;
3233 }
3234
3235 bool insideX = (v->screen[0] - RLSW.pointRadius < max[0]) && (v->screen[0] + RLSW.pointRadius > min[0]);
3236 bool insideY = (v->screen[1] - RLSW.pointRadius < max[1]) && (v->screen[1] + RLSW.pointRadius > min[1]);
3237
3238 return (insideX && insideY);
3239}
3240
3241#define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \
3242static inline void FUNC_NAME(int x, int y, float z, const float color[4]) \
3243{ \
3244 if (CHECK_BOUNDS == 1) \
3245 { \
3246 if ((x < RLSW.vpMin[0]) || (x >= RLSW.vpMax[0])) return; \
3247 if ((y < RLSW.vpMin[1]) || (y >= RLSW.vpMax[1])) return; \
3248 } \
3249 else if (CHECK_BOUNDS == SW_SCISSOR_TEST) \
3250 { \
3251 if ((x < RLSW.scMin[0]) || (x >= RLSW.scMax[0])) return; \
3252 if ((y < RLSW.scMin[1]) || (y >= RLSW.scMax[1])) return; \
3253 } \
3254 \
3255 int offset = y*RLSW.framebuffer.width + x; \
3256 sw_pixel_t *ptr = RLSW.framebuffer.pixels + offset; \
3257 \
3258 if (ENABLE_DEPTH_TEST) \
3259 { \
3260 float depth = sw_framebuffer_read_depth(ptr); \
3261 if (z > depth) return; \
3262 } \
3263 \
3264 sw_framebuffer_write_depth(ptr, z); \
3265 \
3266 if (ENABLE_COLOR_BLEND) \
3267 { \
3268 float dstColor[4]; \
3269 sw_framebuffer_read_color(dstColor, ptr); \
3270 sw_blend_colors(dstColor, color); \
3271 sw_framebuffer_write_color(ptr, dstColor); \
3272 } \
3273 else sw_framebuffer_write_color(ptr, color); \
3274}
3275
3276#define DEFINE_POINT_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \
3277static inline void FUNC_NAME(sw_vertex_t *v) \
3278{ \
3279 int cx = v->screen[0]; \
3280 int cy = v->screen[1]; \
3281 float cz = v->homogeneous[2]; \
3282 int radius = RLSW.pointRadius; \
3283 const float *color = v->color; \
3284 \
3285 int x = 0; \
3286 int y = radius; \
3287 int d = 3 - 2*radius; \
3288 \
3289 while (x <= y) \
3290 { \
3291 for (int i = -x; i <= x; i++) \
3292 { \
3293 RASTER_FUNC(cx + i, cy + y, cz, color); \
3294 RASTER_FUNC(cx + i, cy - y, cz, color); \
3295 } \
3296 for (int i = -y; i <= y; i++) \
3297 { \
3298 RASTER_FUNC(cx + i, cy + x, cz, color); \
3299 RASTER_FUNC(cx + i, cy - x, cz, color); \
3300 } \
3301 if (d > 0) \
3302 { \
3303 y--; \
3304 d = d + 4*(x - y) + 10; \
3305 } \
3306 else d = d + 4*x + 6; \
3307 x++; \
3308 } \
3309}
3310
3311DEFINE_POINT_RASTER(sw_point_raster, 0, 0, 0)
3312DEFINE_POINT_RASTER(sw_point_raster_DEPTH, 1, 0, 0)
3313DEFINE_POINT_RASTER(sw_point_raster_BLEND, 0, 1, 0)
3314DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND, 1, 1, 0)
3315
3316DEFINE_POINT_RASTER(sw_point_raster_CHECK, 0, 0, 1)
3317DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1)
3318DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1)
3319DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1)
3320
3321DEFINE_POINT_RASTER(sw_point_raster_CHECK_SCISSOR, 0, 0, SW_SCISSOR_TEST)
3322DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK_SCISSOR, 1, 0, SW_SCISSOR_TEST)
3323DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK_SCISSOR, 0, 1, SW_SCISSOR_TEST)
3324DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR, 1, 1, SW_SCISSOR_TEST)
3325
3326DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK)
3327DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK)
3328DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK)
3329DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK)
3330
3331DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_SCISSOR, sw_point_raster_CHECK_SCISSOR)
3332DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_SCISSOR, sw_point_raster_DEPTH_CHECK_SCISSOR)
3333DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND_SCISSOR, sw_point_raster_BLEND_CHECK_SCISSOR)
3334DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND_SCISSOR, sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR)
3335
3336static inline void sw_point_render(sw_vertex_t *v)
3337{
3338 if (!sw_point_clip_and_project(v)) return;
3339
3340 if (RLSW.pointRadius >= 1.0f)
3341 {
3342 if (SW_STATE_CHECK(SW_STATE_SCISSOR_TEST))
3343 {
3344 if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_thick_raster_DEPTH_BLEND_SCISSOR(v);
3345 else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_thick_raster_BLEND_SCISSOR(v);
3346 else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_thick_raster_DEPTH_SCISSOR(v);
3347 else sw_point_thick_raster_SCISSOR(v);
3348 }
3349 else
3350 {
3351 if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_thick_raster_DEPTH_BLEND(v);
3352 else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_thick_raster_BLEND(v);
3353 else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_thick_raster_DEPTH(v);
3354 else sw_point_thick_raster(v);
3355 }
3356 }
3357 else
3358 {
3359 if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_raster_DEPTH_BLEND(v->screen[0], v->screen[1], v->homogeneous[2], v->color);
3360 else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_raster_BLEND(v->screen[0], v->screen[1], v->homogeneous[2], v->color);
3361 else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_raster_DEPTH(v->screen[0], v->screen[1], v->homogeneous[2], v->color);
3362 else sw_point_raster(v->screen[0], v->screen[1], v->homogeneous[2], v->color);
3363 }
3364}
3365//-------------------------------------------------------------------------------------------
3366
3367// Polygon modes rendering logic
3368//-------------------------------------------------------------------------------------------
3369static inline void sw_poly_point_render(void)
3370{
3371 for (int i = 0; i < RLSW.vertexCounter; i++) sw_point_render(&RLSW.vertexBuffer[i]);
3372}
3373
3374static inline void sw_poly_line_render(void)
3375{
3376 const sw_vertex_t *vertices = RLSW.vertexBuffer;
3377 int cm1 = RLSW.vertexCounter - 1;
3378
3379 for (int i = 0; i < cm1; i++)
3380 {
3381 sw_vertex_t verts[2] = { vertices[i], vertices[i + 1] };
3382 sw_line_render(verts);
3383 }
3384
3385 sw_vertex_t verts[2] = { vertices[cm1], vertices[0] };
3386 sw_line_render(verts);
3387}
3388
3389static inline void sw_poly_fill_render(void)
3390{
3391 switch (RLSW.drawMode)
3392 {
3393 case SW_POINTS: sw_point_render(&RLSW.vertexBuffer[0]); break;
3394 case SW_LINES: sw_line_render(RLSW.vertexBuffer); break;
3395 case SW_TRIANGLES: sw_triangle_render(); break;
3396 case SW_QUADS: sw_quad_render(); break;
3397 }
3398}
3399//-------------------------------------------------------------------------------------------
3400
3401// Immediate rendering logic
3402//-------------------------------------------------------------------------------------------
3403void sw_immediate_push_vertex(const float position[4], const float color[4], const float texcoord[2])
3404{
3405 // Copy the attributes in the current vertex
3406 sw_vertex_t *vertex = &RLSW.vertexBuffer[RLSW.vertexCounter++];
3407 for (int i = 0; i < 4; i++)
3408 {
3409 vertex->position[i] = position[i];
3410 if (i < 2) vertex->texcoord[i] = texcoord[i];
3411 vertex->color[i] = color[i];
3412 }
3413
3414 // Calculate homogeneous coordinates
3415 const float *m = RLSW.matMVP, *v = vertex->position;
3416 vertex->homogeneous[0] = m[0]*v[0] + m[4]*v[1] + m[8]*v[2] + m[12]*v[3];
3417 vertex->homogeneous[1] = m[1]*v[0] + m[5]*v[1] + m[9]*v[2] + m[13]*v[3];
3418 vertex->homogeneous[2] = m[2]*v[0] + m[6]*v[1] + m[10]*v[2] + m[14]*v[3];
3419 vertex->homogeneous[3] = m[3]*v[0] + m[7]*v[1] + m[11]*v[2] + m[15]*v[3];
3420
3421 // Immediate rendering of the primitive if the required number is reached
3422 if (RLSW.vertexCounter == RLSW.reqVertices)
3423 {
3424 switch (RLSW.polyMode)
3425 {
3426 case SW_FILL: sw_poly_fill_render(); break;
3427 case SW_LINE: sw_poly_line_render(); break;
3428 case SW_POINT: sw_poly_point_render(); break;
3429 default: break;
3430 }
3431
3432 RLSW.vertexCounter = 0;
3433 }
3434}
3435
3436//-------------------------------------------------------------------------------------------
3437
3438// Validity check helper functions
3439//-------------------------------------------------------------------------------------------
3440static inline bool sw_is_texture_valid(uint32_t id)
3441{
3442 bool valid = true;
3443
3444 if (id == 0) valid = false;
3445 else if (id >= SW_MAX_TEXTURES) valid = false;
3446 else if (RLSW.loadedTextures[id].pixels == NULL) valid = false;
3447
3448 return true;
3449}
3450
3451static inline bool sw_is_texture_filter_valid(int filter)
3452{
3453 return ((filter == SW_NEAREST) || (filter == SW_LINEAR));
3454}
3455
3456static inline bool sw_is_texture_wrap_valid(int wrap)
3457{
3458 return ((wrap == SW_REPEAT) || (wrap == SW_CLAMP));
3459}
3460
3461static inline bool sw_is_draw_mode_valid(int mode)
3462{
3463 bool result = false;
3464
3465 switch (mode)
3466 {
3467 case SW_POINTS:
3468 case SW_LINES:
3469 case SW_TRIANGLES:
3470 case SW_QUADS: result = true; break;
3471 default: break;
3472 }
3473
3474 return result;
3475}
3476
3477static inline bool sw_is_poly_mode_valid(int mode)
3478{
3479 bool result = false;
3480
3481 switch (mode)
3482 {
3483 case SW_POINT:
3484 case SW_LINE:
3485 case SW_FILL: result = true; break;
3486 default: break;
3487 }
3488
3489 return result;
3490}
3491
3492static inline bool sw_is_face_valid(int face)
3493{
3494 return (face == SW_FRONT || face == SW_BACK);
3495}
3496
3497static inline bool sw_is_blend_src_factor_valid(int blend)
3498{
3499 bool result = false;
3500
3501 switch (blend)
3502 {
3503 case SW_ZERO:
3504 case SW_ONE:
3505 case SW_SRC_COLOR:
3506 case SW_ONE_MINUS_SRC_COLOR:
3507 case SW_SRC_ALPHA:
3508 case SW_ONE_MINUS_SRC_ALPHA:
3509 case SW_DST_ALPHA:
3510 case SW_ONE_MINUS_DST_ALPHA:
3511 case SW_DST_COLOR:
3512 case SW_ONE_MINUS_DST_COLOR:
3513 case SW_SRC_ALPHA_SATURATE: result = true; break;
3514 default: break;
3515 }
3516
3517 return result;
3518}
3519
3520static inline bool sw_is_blend_dst_factor_valid(int blend)
3521{
3522 bool result = false;
3523
3524 switch (blend)
3525 {
3526 case SW_ZERO:
3527 case SW_ONE:
3528 case SW_SRC_COLOR:
3529 case SW_ONE_MINUS_SRC_COLOR:
3530 case SW_SRC_ALPHA:
3531 case SW_ONE_MINUS_SRC_ALPHA:
3532 case SW_DST_ALPHA:
3533 case SW_ONE_MINUS_DST_ALPHA:
3534 case SW_DST_COLOR:
3535 case SW_ONE_MINUS_DST_COLOR: result = true; break;
3536 default: break;
3537 }
3538
3539 return result;
3540}
3541//-------------------------------------------------------------------------------------------
3542
3543//----------------------------------------------------------------------------------
3544// Module Functions Definition
3545//----------------------------------------------------------------------------------
3546bool swInit(int w, int h)
3547{
3548 if (!sw_framebuffer_load(w, h)) { swClose(); return false; }
3549
3550 swViewport(0, 0, w, h);
3551 swScissor(0, 0, w, h);
3552
3553 RLSW.loadedTextures = (sw_texture_t *)SW_MALLOC(SW_MAX_TEXTURES*sizeof(sw_texture_t));
3554 if (RLSW.loadedTextures == NULL) { swClose(); return false; }
3555
3556 RLSW.freeTextureIds = (uint32_t *)SW_MALLOC(SW_MAX_TEXTURES*sizeof(uint32_t));
3557 if (RLSW.loadedTextures == NULL) { swClose(); return false; }
3558
3559 const float clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
3560 sw_framebuffer_write_color(&RLSW.clearValue, clearColor);
3561 sw_framebuffer_write_depth(&RLSW.clearValue, 1.0f);
3562
3563 RLSW.currentMatrixMode = SW_MODELVIEW;
3564 RLSW.currentMatrix = &RLSW.stackModelview[0];
3565
3566 sw_matrix_id(RLSW.stackProjection[0]);
3567 sw_matrix_id(RLSW.stackModelview[0]);
3568 sw_matrix_id(RLSW.stackTexture[0]);
3569 sw_matrix_id(RLSW.matMVP);
3570
3571 RLSW.stackProjectionCounter = 1;
3572 RLSW.stackModelviewCounter = 1;
3573 RLSW.stackTextureCounter = 1;
3574 RLSW.isDirtyMVP = false;
3575
3576 RLSW.current.texcoord[0] = 0.0f;
3577 RLSW.current.texcoord[1] = 0.0f;
3578
3579 RLSW.current.color[0] = 1.0f;
3580 RLSW.current.color[1] = 1.0f;
3581 RLSW.current.color[2] = 1.0f;
3582 RLSW.current.color[3] = 1.0f;
3583
3584 RLSW.srcFactor = SW_SRC_ALPHA;
3585 RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA;
3586
3587 RLSW.srcFactorFunc = sw_factor_src_alpha;
3588 RLSW.dstFactorFunc = sw_factor_one_minus_src_alpha;
3589
3590 RLSW.polyMode = SW_FILL;
3591 RLSW.cullFace = SW_BACK;
3592
3593 static uint32_t defaultTex[3*2*2] = {
3594 0xFFFFFFFF,
3595 0xFFFFFFFF,
3596 0xFFFFFFFF,
3597 0xFFFFFFFF
3598 };
3599
3600 RLSW.loadedTextures[0].pixels = (uint8_t*)defaultTex;
3601 RLSW.loadedTextures[0].width = 2;
3602 RLSW.loadedTextures[0].height = 2;
3603 RLSW.loadedTextures[0].wMinus1 = 1;
3604 RLSW.loadedTextures[0].hMinus1 = 1;
3605 RLSW.loadedTextures[0].minFilter = SW_NEAREST;
3606 RLSW.loadedTextures[0].magFilter = SW_NEAREST;
3607 RLSW.loadedTextures[0].sWrap = SW_REPEAT;
3608 RLSW.loadedTextures[0].tWrap = SW_REPEAT;
3609 RLSW.loadedTextures[0].tx = 0.5f;
3610 RLSW.loadedTextures[0].ty = 0.5f;
3611
3612 RLSW.loadedTextureCount = 1;
3613
3614 SW_LOG("INFO: RLSW: Software renderer initialized successfully\n");
3615#if defined(SW_HAS_FMA_AVX) && defined(SW_HAS_FMA_AVX2)
3616 SW_LOG("INFO: RLSW: Using SIMD instructions: FMA AVX\n");
3617#endif
3618#if defined(SW_HAS_AVX) || defined(SW_HAS_AVX2)
3619 SW_LOG("INFO: RLSW: Using SIMD instructions: AVX\n");
3620#endif
3621#if defined(SW_HAS_SSE) || defined(SW_HAS_SSE2) || defined(SW_HAS_SSE3) || defined(SW_HAS_SSSE3) || defined(SW_HAS_SSE41) || defined(SW_HAS_SSE42)
3622 SW_LOG("INFO: RLSW: Using SIMD instructions: SSE\n");
3623#endif
3624#if defined(SW_HAS_NEON_FMA) || defined(SW_HAS_NEON)
3625 SW_LOG("INFO: RLSW: Using SIMD instructions: NEON\n");
3626#endif
3627#if defined(SW_HAS_RVV)
3628 SW_LOG("INFO: RLSW: Using SIMD instructions: RVV\n");
3629#endif
3630
3631 return true;
3632}
3633
3634void swClose(void)
3635{
3636 // NOTE: Starts at texture 1, texture 0 does not have to be freed
3637 for (int i = 1; i < RLSW.loadedTextureCount; i++)
3638 {
3639 if (sw_is_texture_valid(i))
3640 {
3641 SW_FREE(RLSW.loadedTextures[i].pixels);
3642 }
3643 }
3644
3645 SW_FREE(RLSW.framebuffer.pixels);
3646 SW_FREE(RLSW.loadedTextures);
3647 SW_FREE(RLSW.freeTextureIds);
3648
3649 RLSW = SW_CURLY_INIT(sw_context_t) { 0 };
3650}
3651
3652bool swResizeFramebuffer(int w, int h)
3653{
3654 return sw_framebuffer_resize(w, h);
3655}
3656
3657void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void *pixels)
3658{
3659 sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type);
3660
3661 if (w <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; }
3662 if (h <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; }
3663
3664 if (w > RLSW.framebuffer.width) w = RLSW.framebuffer.width;
3665 if (h > RLSW.framebuffer.height) h = RLSW.framebuffer.height;
3666
3667 x = sw_clampi(x, 0, w);
3668 y = sw_clampi(y, 0, h);
3669
3670 if ((x >= w) || (y >= h)) return;
3671
3672 if ((x == 0) && (y == 0) && (w == RLSW.framebuffer.width) && (h == RLSW.framebuffer.height))
3673 {
3674 #if SW_COLOR_BUFFER_BITS == 32
3675 if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8)
3676 {
3677 sw_framebuffer_copy_fast(pixels);
3678 return;
3679 }
3680 #elif SW_COLOR_BUFFER_BITS == 16
3681 if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5)
3682 {
3683 sw_framebuffer_copy_fast(pixels);
3684 return;
3685 }
3686 #endif
3687 }
3688
3689 switch (pFormat)
3690 {
3691 case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, (uint8_t *)pixels); break;
3692 case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, (uint8_t *)pixels); break;
3693 case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: sw_framebuffer_copy_to_R5G6B5(x, y, w, h, (uint16_t *)pixels); break;
3694 case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: sw_framebuffer_copy_to_R8G8B8(x, y, w, h, (uint8_t *)pixels); break;
3695 case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: sw_framebuffer_copy_to_R5G5B5A1(x, y, w, h, (uint16_t *)pixels); break;
3696 case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: sw_framebuffer_copy_to_R4G4B4A4(x, y, w, h, (uint16_t *)pixels); break;
3697 //case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: sw_framebuffer_copy_to_R8G8B8A8(x, y, w, h, (uint8_t *)pixels); break;
3698 // Below: not implemented
3699 case SW_PIXELFORMAT_UNCOMPRESSED_R32:
3700 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32:
3701 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32:
3702 case SW_PIXELFORMAT_UNCOMPRESSED_R16:
3703 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16:
3704 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16:
3705 default: RLSW.errCode = SW_INVALID_ENUM; break;
3706 }
3707}
3708
3709void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void *pixels)
3710{
3711 sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type);
3712
3713 if (wSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; }
3714 if (hSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; }
3715
3716 if (wSrc > RLSW.framebuffer.width) wSrc = RLSW.framebuffer.width;
3717 if (hSrc > RLSW.framebuffer.height) hSrc = RLSW.framebuffer.height;
3718
3719 xSrc = sw_clampi(xSrc, 0, wSrc);
3720 ySrc = sw_clampi(ySrc, 0, hSrc);
3721
3722 // Check if the sizes are identical after clamping the source to avoid unexpected issues
3723 // REVIEW: This repeats the operations if true, so we could make a copy function without these checks
3724 if (xDst == xSrc && yDst == ySrc && wDst == wSrc && hDst == hSrc)
3725 {
3726 swCopyFramebuffer(xSrc, ySrc, wSrc, hSrc, format, type, pixels);
3727 }
3728
3729 switch (pFormat)
3730 {
3731 case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break;
3732 case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break;
3733 case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: sw_framebuffer_blit_to_R5G6B5(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break;
3734 case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: sw_framebuffer_blit_to_R8G8B8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break;
3735 case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: sw_framebuffer_blit_to_R5G5B5A1(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break;
3736 case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: sw_framebuffer_blit_to_R4G4B4A4(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break;
3737 case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: sw_framebuffer_blit_to_R8G8B8A8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break;
3738 // Below: not implemented
3739 case SW_PIXELFORMAT_UNCOMPRESSED_R32:
3740 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32:
3741 case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32:
3742 case SW_PIXELFORMAT_UNCOMPRESSED_R16:
3743 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16:
3744 case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16:
3745 default:
3746 RLSW.errCode = SW_INVALID_ENUM;
3747 break;
3748 }
3749}
3750
3751void swEnable(SWstate state)
3752{
3753 switch (state)
3754 {
3755 case SW_SCISSOR_TEST: RLSW.stateFlags |= SW_STATE_SCISSOR_TEST; break;
3756 case SW_TEXTURE_2D: RLSW.stateFlags |= SW_STATE_TEXTURE_2D; break;
3757 case SW_DEPTH_TEST: RLSW.stateFlags |= SW_STATE_DEPTH_TEST; break;
3758 case SW_CULL_FACE: RLSW.stateFlags |= SW_STATE_CULL_FACE; break;
3759 case SW_BLEND: RLSW.stateFlags |= SW_STATE_BLEND; break;
3760 default: RLSW.errCode = SW_INVALID_ENUM; break;
3761 }
3762}
3763
3764void swDisable(SWstate state)
3765{
3766 switch (state)
3767 {
3768 case SW_SCISSOR_TEST: RLSW.stateFlags &= ~SW_STATE_SCISSOR_TEST; break;
3769 case SW_TEXTURE_2D: RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; break;
3770 case SW_DEPTH_TEST: RLSW.stateFlags &= ~SW_STATE_DEPTH_TEST; break;
3771 case SW_CULL_FACE: RLSW.stateFlags &= ~SW_STATE_CULL_FACE; break;
3772 case SW_BLEND: RLSW.stateFlags &= ~SW_STATE_BLEND; break;
3773 default: RLSW.errCode = SW_INVALID_ENUM; break;
3774 }
3775}
3776
3777void swGetIntegerv(SWget name, int *v)
3778{
3779 switch (name)
3780 {
3781 case SW_MODELVIEW_STACK_DEPTH: *v = SW_MODELVIEW_STACK_DEPTH; break;
3782 case SW_PROJECTION_STACK_DEPTH: *v = SW_PROJECTION_STACK_DEPTH; break;
3783 case SW_TEXTURE_STACK_DEPTH: *v = SW_TEXTURE_STACK_DEPTH; break;
3784 default: RLSW.errCode = SW_INVALID_ENUM; break;
3785 }
3786}
3787
3788void swGetFloatv(SWget name, float *v)
3789{
3790 switch (name)
3791 {
3792 case SW_COLOR_CLEAR_VALUE:
3793 {
3794 sw_framebuffer_read_color(v, &RLSW.clearValue);
3795 } break;
3796 case SW_DEPTH_CLEAR_VALUE:
3797 {
3798 v[0] = sw_framebuffer_read_depth(&RLSW.clearValue);
3799 } break;
3800 case SW_CURRENT_COLOR:
3801 {
3802 v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[0];
3803 v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[1];
3804 v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[2];
3805 v[3] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[3];
3806 } break;
3807 case SW_CURRENT_TEXTURE_COORDS:
3808 {
3809 v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[0];
3810 v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[1];
3811 } break;
3812 case SW_POINT_SIZE:
3813 {
3814 v[0] = 2.0f*RLSW.pointRadius;
3815 } break;
3816 case SW_LINE_WIDTH:
3817 {
3818 v[0] = RLSW.lineWidth;
3819 } break;
3820 case SW_MODELVIEW_MATRIX:
3821 {
3822 for (int i = 0; i < 16; i++) v[i] = RLSW.stackModelview[RLSW.stackModelviewCounter - 1][i];
3823
3824 } break;
3825 case SW_PROJECTION_MATRIX:
3826 {
3827 for (int i = 0; i < 16; i++) v[i] = RLSW.stackProjection[RLSW.stackProjectionCounter - 1][i];
3828
3829 } break;
3830 case SW_TEXTURE_MATRIX:
3831 {
3832 for (int i = 0; i < 16; i++) v[i] = RLSW.stackTexture[RLSW.stackTextureCounter - 1][i];
3833
3834 } break;
3835 default: RLSW.errCode = SW_INVALID_ENUM; break;
3836 }
3837}
3838
3839const char *swGetString(SWget name)
3840{
3841 const char *result = NULL;
3842
3843 switch (name)
3844 {
3845 case SW_VENDOR: result = "RLSW Header"; break;
3846 case SW_RENDERER: result = "RLSW Software Renderer"; break;
3847 case SW_VERSION: result = "RLSW 1.0"; break;
3848 case SW_EXTENSIONS: result = "None"; break;
3849 default: RLSW.errCode = SW_INVALID_ENUM; break;
3850 }
3851
3852 return result;
3853}
3854
3855SWerrcode swGetError(void)
3856{
3857 SWerrcode ret = RLSW.errCode;
3858 RLSW.errCode = SW_NO_ERROR;
3859 return ret;
3860}
3861
3862void swViewport(int x, int y, int width, int height)
3863{
3864 if ((width < 0) || (height < 0))
3865 {
3866 RLSW.errCode = SW_INVALID_VALUE;
3867 return;
3868 }
3869
3870 RLSW.vpSize[0] = width;
3871 RLSW.vpSize[1] = height;
3872
3873 RLSW.vpHalf[0] = width/2.0f;
3874 RLSW.vpHalf[1] = height/2.0f;
3875
3876 RLSW.vpCenter[0] = (float)x + RLSW.vpHalf[0];
3877 RLSW.vpCenter[1] = (float)y + RLSW.vpHalf[1];
3878
3879 RLSW.vpMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1);
3880 RLSW.vpMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1);
3881 RLSW.vpMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1);
3882 RLSW.vpMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1);
3883}
3884
3885void swScissor(int x, int y, int width, int height)
3886{
3887 if ((width < 0) || (height < 0))
3888 {
3889 RLSW.errCode = SW_INVALID_VALUE;
3890 return;
3891 }
3892
3893 RLSW.scMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1);
3894 RLSW.scMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1);
3895 RLSW.scMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1);
3896 RLSW.scMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1);
3897
3898 RLSW.scClipMin[0] = (2.0f*(float)RLSW.scMin[0]/(float)RLSW.vpSize[0]) - 1.0f;
3899 RLSW.scClipMax[0] = (2.0f*(float)RLSW.scMax[0]/(float)RLSW.vpSize[0]) - 1.0f;
3900 RLSW.scClipMax[1] = 1.0f - (2.0f*(float)RLSW.scMin[1]/(float)RLSW.vpSize[1]);
3901 RLSW.scClipMin[1] = 1.0f - (2.0f*(float)RLSW.scMax[1]/(float)RLSW.vpSize[1]);
3902}
3903
3904void swClearColor(float r, float g, float b, float a)
3905{
3906 float v[4] = { r, g, b, a };
3907 sw_framebuffer_write_color(&RLSW.clearValue, v);
3908}
3909
3910void swClearDepth(float depth)
3911{
3912 sw_framebuffer_write_depth(&RLSW.clearValue, depth);
3913}
3914
3915void swClear(uint32_t bitmask)
3916{
3917 int size = RLSW.framebuffer.width*RLSW.framebuffer.height;
3918
3919 if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT))
3920 {
3921 sw_framebuffer_fill(RLSW.framebuffer.pixels, size, RLSW.clearValue);
3922 }
3923 else if (bitmask & (SW_COLOR_BUFFER_BIT))
3924 {
3925 sw_framebuffer_fill_color(RLSW.framebuffer.pixels, size, RLSW.clearValue.color);
3926 }
3927 else if (bitmask & SW_DEPTH_BUFFER_BIT)
3928 {
3929 sw_framebuffer_fill_depth(RLSW.framebuffer.pixels, size, RLSW.clearValue.depth);
3930 }
3931}
3932
3933void swBlendFunc(SWfactor sfactor, SWfactor dfactor)
3934{
3935 if (!sw_is_blend_src_factor_valid(sfactor) ||
3936 !sw_is_blend_dst_factor_valid(dfactor))
3937 {
3938 RLSW.errCode = SW_INVALID_ENUM;
3939 return;
3940 }
3941
3942 RLSW.srcFactor = sfactor;
3943 RLSW.dstFactor = dfactor;
3944
3945 switch (sfactor)
3946 {
3947 case SW_ZERO: RLSW.srcFactorFunc = sw_factor_zero; break;
3948 case SW_ONE: RLSW.srcFactorFunc = sw_factor_one; break;
3949 case SW_SRC_COLOR: RLSW.srcFactorFunc = sw_factor_src_color; break;
3950 case SW_ONE_MINUS_SRC_COLOR: RLSW.srcFactorFunc = sw_factor_one_minus_src_color; break;
3951 case SW_SRC_ALPHA: RLSW.srcFactorFunc = sw_factor_src_alpha; break;
3952 case SW_ONE_MINUS_SRC_ALPHA: RLSW.srcFactorFunc = sw_factor_one_minus_src_alpha; break;
3953 case SW_DST_ALPHA: RLSW.srcFactorFunc = sw_factor_dst_alpha; break;
3954 case SW_ONE_MINUS_DST_ALPHA: RLSW.srcFactorFunc = sw_factor_one_minus_dst_alpha; break;
3955 case SW_DST_COLOR: RLSW.srcFactorFunc = sw_factor_dst_color; break;
3956 case SW_ONE_MINUS_DST_COLOR: RLSW.srcFactorFunc = sw_factor_one_minus_dst_color; break;
3957 case SW_SRC_ALPHA_SATURATE: RLSW.srcFactorFunc = sw_factor_src_alpha_saturate; break;
3958 default: break;
3959 }
3960
3961 switch (dfactor)
3962 {
3963 case SW_ZERO: RLSW.dstFactorFunc = sw_factor_zero; break;
3964 case SW_ONE: RLSW.dstFactorFunc = sw_factor_one; break;
3965 case SW_SRC_COLOR: RLSW.dstFactorFunc = sw_factor_src_color; break;
3966 case SW_ONE_MINUS_SRC_COLOR: RLSW.dstFactorFunc = sw_factor_one_minus_src_color; break;
3967 case SW_SRC_ALPHA: RLSW.dstFactorFunc = sw_factor_src_alpha; break;
3968 case SW_ONE_MINUS_SRC_ALPHA: RLSW.dstFactorFunc = sw_factor_one_minus_src_alpha; break;
3969 case SW_DST_ALPHA: RLSW.dstFactorFunc = sw_factor_dst_alpha; break;
3970 case SW_ONE_MINUS_DST_ALPHA: RLSW.dstFactorFunc = sw_factor_one_minus_dst_alpha; break;
3971 case SW_DST_COLOR: RLSW.dstFactorFunc = sw_factor_dst_color; break;
3972 case SW_ONE_MINUS_DST_COLOR: RLSW.dstFactorFunc = sw_factor_one_minus_dst_color; break;
3973 case SW_SRC_ALPHA_SATURATE: break;
3974 default: break;
3975 }
3976}
3977
3978void swPolygonMode(SWpoly mode)
3979{
3980 if (!sw_is_poly_mode_valid(mode))
3981 {
3982 RLSW.errCode = SW_INVALID_ENUM;
3983 return;
3984 }
3985
3986 RLSW.polyMode = mode;
3987}
3988
3989void swCullFace(SWface face)
3990{
3991 if (!sw_is_face_valid(face))
3992 {
3993 RLSW.errCode = SW_INVALID_ENUM;
3994 return;
3995 }
3996
3997 RLSW.cullFace = face;
3998}
3999
4000void swPointSize(float size)
4001{
4002 RLSW.pointRadius = floorf(size*0.5f);
4003}
4004
4005void swLineWidth(float width)
4006{
4007 RLSW.lineWidth = roundf(width);
4008}
4009
4010void swMatrixMode(SWmatrix mode)
4011{
4012 switch (mode)
4013 {
4014 case SW_PROJECTION: RLSW.currentMatrix = &RLSW.stackProjection[RLSW.stackProjectionCounter - 1]; break;
4015 case SW_MODELVIEW: RLSW.currentMatrix = &RLSW.stackModelview[RLSW.stackModelviewCounter - 1]; break;
4016 case SW_TEXTURE: RLSW.currentMatrix = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; break;
4017 default: RLSW.errCode = SW_INVALID_ENUM; return;
4018 }
4019
4020 RLSW.currentMatrixMode = mode;
4021}
4022
4023void swPushMatrix(void)
4024{
4025 switch (RLSW.currentMatrixMode)
4026 {
4027 case SW_PROJECTION:
4028 {
4029 if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE)
4030 {
4031 RLSW.errCode = SW_STACK_OVERFLOW;
4032 return;
4033 }
4034
4035 int iOld = RLSW.stackProjectionCounter - 1;
4036 int iNew = RLSW.stackProjectionCounter++;
4037
4038 for (int i = 0; i < 16; i++)
4039 {
4040 RLSW.stackProjection[iNew][i] = RLSW.stackProjection[iOld][i];
4041 }
4042
4043 RLSW.currentMatrix = &RLSW.stackProjection[iNew];
4044 } break;
4045 case SW_MODELVIEW:
4046 {
4047 if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE)
4048 {
4049 RLSW.errCode = SW_STACK_OVERFLOW;
4050 return;
4051 }
4052
4053 int iOld = RLSW.stackModelviewCounter - 1;
4054 int iNew = RLSW.stackModelviewCounter++;
4055
4056 for (int i = 0; i < 16; i++)
4057 {
4058 RLSW.stackModelview[iNew][i] = RLSW.stackModelview[iOld][i];
4059 }
4060
4061 RLSW.currentMatrix = &RLSW.stackModelview[iNew];
4062 } break;
4063 case SW_TEXTURE:
4064 {
4065 if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE)
4066 {
4067 RLSW.errCode = SW_STACK_OVERFLOW;
4068 return;
4069 }
4070
4071 int iOld = RLSW.stackTextureCounter - 1;
4072 int iNew = RLSW.stackTextureCounter++;
4073
4074 for (int i = 0; i < 16; i++)
4075 {
4076 RLSW.stackTexture[iNew][i] = RLSW.stackTexture[iOld][i];
4077 }
4078
4079 RLSW.currentMatrix = &RLSW.stackTexture[iNew];
4080 } break;
4081 default: break;
4082 }
4083}
4084
4085void swPopMatrix(void)
4086{
4087 switch (RLSW.currentMatrixMode)
4088 {
4089 case SW_PROJECTION:
4090 {
4091 if (RLSW.stackProjectionCounter <= 0)
4092 {
4093 RLSW.errCode = SW_STACK_UNDERFLOW;
4094 return;
4095 }
4096
4097 RLSW.currentMatrix = &RLSW.stackProjection[--RLSW.stackProjectionCounter];
4098 RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed
4099 } break;
4100 case SW_MODELVIEW:
4101 {
4102 if (RLSW.stackModelviewCounter <= 0)
4103 {
4104 RLSW.errCode = SW_STACK_UNDERFLOW;
4105 return;
4106 }
4107
4108 RLSW.currentMatrix = &RLSW.stackModelview[--RLSW.stackModelviewCounter];
4109 RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed
4110 } break;
4111 case SW_TEXTURE:
4112 {
4113 if (RLSW.stackTextureCounter <= 0)
4114 {
4115 RLSW.errCode = SW_STACK_UNDERFLOW;
4116 return;
4117 }
4118
4119 RLSW.currentMatrix = &RLSW.stackTexture[--RLSW.stackTextureCounter];
4120 } break;
4121 default: break;
4122 }
4123}
4124
4125void swLoadIdentity(void)
4126{
4127 sw_matrix_id(*RLSW.currentMatrix);
4128 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4129}
4130
4131void swTranslatef(float x, float y, float z)
4132{
4133 sw_matrix_t mat;
4134 sw_matrix_id(mat);
4135
4136 mat[12] = x;
4137 mat[13] = y;
4138 mat[14] = z;
4139
4140 sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix);
4141
4142 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4143}
4144
4145void swRotatef(float angle, float x, float y, float z)
4146{
4147 angle *= SW_DEG2RAD;
4148
4149 float lengthSq = x*x + y*y + z*z;
4150
4151 if ((lengthSq != 1.0f) && (lengthSq != 0.0f))
4152 {
4153 float invLength = 1.0f/sqrtf(lengthSq);
4154 x *= invLength;
4155 y *= invLength;
4156 z *= invLength;
4157 }
4158
4159 float sinres = sinf(angle);
4160 float cosres = cosf(angle);
4161 float t = 1.0f - cosres;
4162
4163 sw_matrix_t mat;
4164
4165 mat[0] = x*x*t + cosres;
4166 mat[1] = y*x*t + z*sinres;
4167 mat[2] = z*x*t - y*sinres;
4168 mat[3] = 0.0f;
4169
4170 mat[4] = x*y*t - z*sinres;
4171 mat[5] = y*y*t + cosres;
4172 mat[6] = z*y*t + x*sinres;
4173 mat[7] = 0.0f;
4174
4175 mat[8] = x*z*t + y*sinres;
4176 mat[9] = y*z*t - x*sinres;
4177 mat[10] = z*z*t + cosres;
4178 mat[11] = 0.0f;
4179
4180 mat[12] = 0.0f;
4181 mat[13] = 0.0f;
4182 mat[14] = 0.0f;
4183 mat[15] = 1.0f;
4184
4185 sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix);
4186
4187 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4188}
4189
4190void swScalef(float x, float y, float z)
4191{
4192 sw_matrix_t mat;
4193
4194 mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0;
4195 mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0;
4196 mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0;
4197 mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1;
4198
4199 sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix);
4200
4201 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4202}
4203
4204void swMultMatrixf(const float *mat)
4205{
4206 sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix);
4207
4208 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4209}
4210
4211void swFrustum(double left, double right, double bottom, double top, double znear, double zfar)
4212{
4213 sw_matrix_t mat;
4214
4215 double rl = right - left;
4216 double tb = top - bottom;
4217 double fn = zfar - znear;
4218
4219 mat[0] = (float)(znear*2.0)/rl;
4220 mat[1] = 0.0f;
4221 mat[2] = 0.0f;
4222 mat[3] = 0.0f;
4223
4224 mat[4] = 0.0f;
4225 mat[5] = (float)(znear*2.0)/tb;
4226 mat[6] = 0.0f;
4227 mat[7] = 0.0f;
4228
4229 mat[8] = (float)(right + left)/rl;
4230 mat[9] = (float)(top + bottom)/tb;
4231 mat[10] = -(float)(zfar + znear)/fn;
4232 mat[11] = -1.0f;
4233
4234 mat[12] = 0.0f;
4235 mat[13] = 0.0f;
4236 mat[14] = -(zfar*znear*2.0)/fn;
4237 mat[15] = 0.0f;
4238
4239 sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat);
4240
4241 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4242}
4243
4244void swOrtho(double left, double right, double bottom, double top, double znear, double zfar)
4245{
4246 sw_matrix_t mat;
4247
4248 double rl = right - left;
4249 double tb = top - bottom;
4250 double fn = zfar - znear;
4251
4252 mat[0] = 2.0f/(float)rl;
4253 mat[1] = 0.0f;
4254 mat[2] = 0.0f;
4255 mat[3] = 0.0f;
4256
4257 mat[4] = 0.0f;
4258 mat[5] = 2.0f/(float)tb;
4259 mat[6] = 0.0f;
4260 mat[7] = 0.0f;
4261
4262 mat[8] = 0.0f;
4263 mat[9] = 0.0f;
4264 mat[10] = -2.0f/(float)fn;
4265 mat[11] = 0.0f;
4266
4267 mat[12] = -(float)(left + right)/rl;
4268 mat[13] = -(float)(top + bottom)/tb;
4269 mat[14] = -(float)(zfar + znear)/fn;
4270 mat[15] = 1.0f;
4271
4272 sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat);
4273
4274 if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true;
4275}
4276
4277void swBegin(SWdraw mode)
4278{
4279 // Check if the draw mode is valid
4280 if (!sw_is_draw_mode_valid(mode))
4281 {
4282 RLSW.errCode = SW_INVALID_ENUM;
4283 return;
4284 }
4285
4286 // Recalculate the MVP if this is needed
4287 if (RLSW.isDirtyMVP)
4288 {
4289 sw_matrix_mul_rst(RLSW.matMVP,
4290 RLSW.stackModelview[RLSW.stackModelviewCounter - 1],
4291 RLSW.stackProjection[RLSW.stackProjectionCounter - 1]);
4292
4293 RLSW.isDirtyMVP = false;
4294 }
4295
4296 // Obtain the number of vertices needed for this primitive
4297 switch (mode)
4298 {
4299 case SW_POINTS: RLSW.reqVertices = 1; break;
4300 case SW_LINES: RLSW.reqVertices = 2; break;
4301 case SW_TRIANGLES: RLSW.reqVertices = 3; break;
4302 case SW_QUADS: RLSW.reqVertices = 4; break;
4303 }
4304
4305 // Initialize required values
4306 RLSW.vertexCounter = 0;
4307 RLSW.drawMode = mode;
4308}
4309
4310void swEnd(void)
4311{
4312 RLSW.drawMode = (SWdraw)0;
4313}
4314
4315void swVertex2i(int x, int y)
4316{
4317 const float v[4] = { (float)x, (float)y, 0.0f, 1.0f };
4318 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4319}
4320
4321void swVertex2f(float x, float y)
4322{
4323 const float v[4] = { x, y, 0.0f, 1.0f };
4324 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4325}
4326
4327void swVertex2fv(const float *v)
4328{
4329 const float v4[4] = { v[0], v[1], 0.0f, 1.0f };
4330 sw_immediate_push_vertex(v4, RLSW.current.color, RLSW.current.texcoord);
4331}
4332
4333void swVertex3i(int x, int y, int z)
4334{
4335 const float v[4] = { (float)x, (float)y, (float)z, 1.0f };
4336 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4337}
4338
4339void swVertex3f(float x, float y, float z)
4340{
4341 const float v[4] = { x, y, z, 1.0f };
4342 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4343}
4344
4345void swVertex3fv(const float *v)
4346{
4347 const float v4[4] = { v[0], v[1], v[2], 1.0f };
4348 sw_immediate_push_vertex(v4, RLSW.current.color, RLSW.current.texcoord);
4349}
4350
4351void swVertex4i(int x, int y, int z, int w)
4352{
4353 const float v[4] = { (float)x, (float)y, (float)z, (float)w };
4354 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4355}
4356
4357void swVertex4f(float x, float y, float z, float w)
4358{
4359 const float v[4] = { x, y, z, w };
4360 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4361}
4362
4363void swVertex4fv(const float *v)
4364{
4365 sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord);
4366}
4367
4368void swColor3ub(uint8_t r, uint8_t g, uint8_t b)
4369{
4370 float cv[4];
4371 cv[0] = (float)r*SW_INV_255;
4372 cv[1] = (float)g*SW_INV_255;
4373 cv[2] = (float)b*SW_INV_255;
4374 cv[3] = 1.0f;
4375
4376 swColor4fv(cv);
4377}
4378
4379void swColor3ubv(const uint8_t *v)
4380{
4381 float cv[4];
4382 cv[0] = (float)v[0]*SW_INV_255;
4383 cv[1] = (float)v[1]*SW_INV_255;
4384 cv[2] = (float)v[2]*SW_INV_255;
4385 cv[3] = 1.0f;
4386
4387 swColor4fv(cv);
4388}
4389
4390void swColor3f(float r, float g, float b)
4391{
4392 float cv[4];
4393 cv[0] = r;
4394 cv[1] = g;
4395 cv[2] = b;
4396 cv[3] = 1.0f;
4397
4398 swColor4fv(cv);
4399}
4400
4401void swColor3fv(const float *v)
4402{
4403 float cv[4];
4404 cv[0] = v[0];
4405 cv[1] = v[1];
4406 cv[2] = v[2];
4407 cv[3] = 1.0f;
4408
4409 swColor4fv(cv);
4410}
4411
4412void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a)
4413{
4414 float cv[4];
4415 cv[0] = (float)r*SW_INV_255;
4416 cv[1] = (float)g*SW_INV_255;
4417 cv[2] = (float)b*SW_INV_255;
4418 cv[3] = (float)a*SW_INV_255;
4419
4420 swColor4fv(cv);
4421}
4422
4423void swColor4ubv(const uint8_t *v)
4424{
4425 float cv[4];
4426 cv[0] = (float)v[0]*SW_INV_255;
4427 cv[1] = (float)v[1]*SW_INV_255;
4428 cv[2] = (float)v[2]*SW_INV_255;
4429 cv[3] = (float)v[3]*SW_INV_255;
4430
4431 swColor4fv(cv);
4432}
4433
4434void swColor4f(float r, float g, float b, float a)
4435{
4436 float cv[4];
4437 cv[0] = r;
4438 cv[1] = g;
4439 cv[2] = b;
4440 cv[3] = a;
4441
4442 swColor4fv(cv);
4443}
4444
4445void swColor4fv(const float *v)
4446{
4447 for (int i = 0; i < 4; i++) RLSW.current.color[i] = v[i];
4448}
4449
4450void swTexCoord2f(float u, float v)
4451{
4452 const float *m = RLSW.stackTexture[RLSW.stackTextureCounter - 1];
4453
4454 RLSW.current.texcoord[0] = m[0]*u + m[4]*v + m[12];
4455 RLSW.current.texcoord[1] = m[1]*u + m[5]*v + m[13];
4456}
4457
4458void swTexCoord2fv(const float *v)
4459{
4460 const float *m = RLSW.stackTexture[RLSW.stackTextureCounter - 1];
4461
4462 RLSW.current.texcoord[0] = m[0]*v[0] + m[4]*v[1] + m[12];
4463 RLSW.current.texcoord[1] = m[1]*v[0] + m[5]*v[1] + m[13];
4464}
4465
4466void swBindArray(SWarray type, void *buffer)
4467{
4468 switch (type)
4469 {
4470 case SW_VERTEX_ARRAY: RLSW.array.positions = (float *)buffer; break;
4471 case SW_TEXTURE_COORD_ARRAY: RLSW.array.texcoords = (float *)buffer; break;
4472 case SW_COLOR_ARRAY: RLSW.array.colors = (uint8_t *)buffer; break;
4473 default: break;
4474 }
4475}
4476
4477void swDrawArrays(SWdraw mode, int offset, int count)
4478{
4479 if (RLSW.array.positions == 0)
4480 {
4481 RLSW.errCode = SW_INVALID_OPERATION;
4482 return;
4483 }
4484
4485 swBegin(mode);
4486 {
4487 const float *texMatrix = RLSW.stackTexture[RLSW.stackTextureCounter - 1];
4488 const float *defaultTexcoord = RLSW.current.texcoord;
4489 const float *defaultColor = RLSW.current.color;
4490
4491 const float *positions = RLSW.array.positions;
4492 const float *texcoords = RLSW.array.texcoords;
4493 const uint8_t *colors = RLSW.array.colors;
4494
4495 int end = offset + count;
4496
4497 for (int i = offset; i < end; i++)
4498 {
4499 float u, v;
4500 if (texcoords)
4501 {
4502 int idx = 2*i;
4503 u = texcoords[idx];
4504 v = texcoords[idx + 1];
4505 }
4506 else
4507 {
4508 u = defaultTexcoord[0];
4509 v = defaultTexcoord[1];
4510 }
4511
4512 float texcoord[2];
4513 texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12];
4514 texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13];
4515
4516 float color[4] = {
4517 defaultColor[0],
4518 defaultColor[1],
4519 defaultColor[2],
4520 defaultColor[3]
4521 };
4522
4523 if (colors)
4524 {
4525 int idx = 4*i;
4526 color[0] *= (float)colors[idx]*SW_INV_255;
4527 color[1] *= (float)colors[idx + 1]*SW_INV_255;
4528 color[2] *= (float)colors[idx + 2]*SW_INV_255;
4529 color[3] *= (float)colors[idx + 3]*SW_INV_255;
4530 }
4531
4532 int idx = 3*i;
4533 float position[4] = {
4534 positions[idx],
4535 positions[idx + 1],
4536 positions[idx + 2],
4537 1.0f
4538 };
4539
4540 sw_immediate_push_vertex(position, color, texcoord);
4541 }
4542 }
4543 swEnd();
4544}
4545
4546void swDrawElements(SWdraw mode, int count, int type, const void *indices)
4547{
4548 if (RLSW.array.positions == 0)
4549 {
4550 RLSW.errCode = SW_INVALID_OPERATION;
4551 return;
4552 }
4553
4554 if (count < 0)
4555 {
4556 RLSW.errCode = SW_INVALID_VALUE;
4557 return;
4558 }
4559
4560 const uint8_t *indicesUb = NULL;
4561 const uint16_t *indicesUs = NULL;
4562 const uint32_t *indicesUi = NULL;
4563
4564 switch (type)
4565 {
4566 case SW_UNSIGNED_BYTE:
4567 indicesUb = (const uint8_t *)indices;
4568 break;
4569 case SW_UNSIGNED_SHORT:
4570 indicesUs = (const uint16_t *)indices;
4571 break;
4572 case SW_UNSIGNED_INT:
4573 indicesUi = (const uint32_t *)indices;
4574 break;
4575 default:
4576 RLSW.errCode = SW_INVALID_ENUM;
4577 return;
4578 }
4579
4580 swBegin(mode);
4581 {
4582 const float *texMatrix = RLSW.stackTexture[RLSW.stackTextureCounter - 1];
4583 const float *defaultTexcoord = RLSW.current.texcoord;
4584 const float *defaultColor = RLSW.current.color;
4585
4586 const float *positions = RLSW.array.positions;
4587 const float *texcoords = RLSW.array.texcoords;
4588 const uint8_t *colors = RLSW.array.colors;
4589
4590 for (int i = 0; i < count; i++)
4591 {
4592 int index = indicesUb? indicesUb[i] :
4593 (indicesUs? indicesUs[i] : indicesUi[i]);
4594
4595 float u, v;
4596 if (texcoords)
4597 {
4598 int idx = 2*index;
4599 u = texcoords[idx];
4600 v = texcoords[idx + 1];
4601 }
4602 else
4603 {
4604 u = defaultTexcoord[0];
4605 v = defaultTexcoord[1];
4606 }
4607
4608 float texcoord[2];
4609 texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12];
4610 texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13];
4611
4612 float color[4] = {
4613 defaultColor[0],
4614 defaultColor[1],
4615 defaultColor[2],
4616 defaultColor[3]
4617 };
4618
4619 if (colors)
4620 {
4621 int idx = 4*index;
4622 color[0] *= (float)colors[idx]*SW_INV_255;
4623 color[1] *= (float)colors[idx + 1]*SW_INV_255;
4624 color[2] *= (float)colors[idx + 2]*SW_INV_255;
4625 color[3] *= (float)colors[idx + 3]*SW_INV_255;
4626 }
4627
4628 int idx = 3*index;
4629 float position[4] = {
4630 positions[idx],
4631 positions[idx + 1],
4632 positions[idx + 2],
4633 1.0f
4634 };
4635
4636 sw_immediate_push_vertex(position, color, texcoord);
4637 }
4638 }
4639 swEnd();
4640}
4641
4642void swGenTextures(int count, uint32_t *textures)
4643{
4644 if ((count == 0) || (textures == NULL)) return;
4645
4646 for (int i = 0; i < count; i++)
4647 {
4648 if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES)
4649 {
4650 RLSW.errCode = SW_STACK_OVERFLOW; // WARNING: Out of memory, not really stack overflow
4651 return;
4652 }
4653
4654 uint32_t id = 0;
4655 if (RLSW.freeTextureIdCount > 0) id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount];
4656 else id = RLSW.loadedTextureCount++;
4657
4658 RLSW.loadedTextures[id] = RLSW.loadedTextures[0];
4659 textures[i] = id;
4660 }
4661}
4662
4663void swDeleteTextures(int count, uint32_t *textures)
4664{
4665 if ((count == 0) || (textures == NULL)) return;
4666
4667 for (int i = 0; i < count; i++)
4668 {
4669 if (!sw_is_texture_valid(textures[i]))
4670 {
4671 RLSW.errCode = SW_INVALID_VALUE;
4672 continue;
4673 }
4674
4675 SW_FREE(RLSW.loadedTextures[textures[i]].pixels);
4676
4677 RLSW.loadedTextures[textures[i]].pixels = NULL;
4678 RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i];
4679 }
4680}
4681
4682void swTexImage2D(int width, int height, SWformat format, SWtype type, const void *data)
4683{
4684 uint32_t id = RLSW.currentTexture;
4685
4686 if (!sw_is_texture_valid(id))
4687 {
4688 RLSW.errCode = SW_INVALID_VALUE;
4689 return;
4690 }
4691
4692 int pixelFormat = sw_get_pixel_format(format, type);
4693
4694 if (pixelFormat <= SW_PIXELFORMAT_UNKNOWN)
4695 {
4696 RLSW.errCode = SW_INVALID_ENUM;
4697 return;
4698 }
4699
4700 sw_texture_t *texture = &RLSW.loadedTextures[id];
4701
4702 int size = width*height;
4703 texture->pixels = SW_MALLOC(4*size);
4704
4705 if (texture->pixels == NULL)
4706 {
4707 RLSW.errCode = SW_STACK_OVERFLOW; // WARNING: Out of memory...
4708 return;
4709 }
4710
4711 for (int i = 0; i < size; i++)
4712 {
4713 uint32_t *dst = &((uint32_t*)texture->pixels)[i];
4714 sw_get_pixel((uint8_t*)dst, data, i, pixelFormat);
4715 }
4716
4717 texture->width = width;
4718 texture->height = height;
4719 texture->wMinus1 = width - 1;
4720 texture->hMinus1 = height - 1;
4721 texture->tx = 1.0f/width;
4722 texture->ty = 1.0f/height;
4723}
4724
4725void swTexParameteri(int param, int value)
4726{
4727 uint32_t id = RLSW.currentTexture;
4728
4729 if (!sw_is_texture_valid(id))
4730 {
4731 RLSW.errCode = SW_INVALID_VALUE;
4732 return;
4733 }
4734
4735 sw_texture_t *texture = &RLSW.loadedTextures[id];
4736
4737 switch (param)
4738 {
4739 case SW_TEXTURE_MIN_FILTER:
4740 {
4741 if (!sw_is_texture_filter_valid(value))
4742 {
4743 RLSW.errCode = SW_INVALID_ENUM;
4744 return;
4745 }
4746
4747 texture->minFilter = (SWfilter)value;
4748 } break;
4749 case SW_TEXTURE_MAG_FILTER:
4750 {
4751 if (!sw_is_texture_filter_valid(value))
4752 {
4753 RLSW.errCode = SW_INVALID_ENUM;
4754 return;
4755 }
4756
4757 texture->magFilter = (SWfilter)value;
4758 } break;
4759 case SW_TEXTURE_WRAP_S:
4760 {
4761 if (!sw_is_texture_wrap_valid(value))
4762 {
4763 RLSW.errCode = SW_INVALID_ENUM;
4764 return;
4765 }
4766
4767 texture->sWrap = (SWwrap)value;
4768 } break;
4769 case SW_TEXTURE_WRAP_T:
4770 {
4771 if (!sw_is_texture_wrap_valid(value))
4772 {
4773 RLSW.errCode = SW_INVALID_ENUM;
4774 return;
4775 }
4776
4777 texture->tWrap = (SWwrap)value;
4778 } break;
4779 default: RLSW.errCode = SW_INVALID_ENUM; break;
4780 }
4781}
4782
4783void swBindTexture(uint32_t id)
4784{
4785 if (id >= SW_MAX_TEXTURES)
4786 {
4787 RLSW.errCode = SW_INVALID_VALUE;
4788 return;
4789 }
4790
4791 if (RLSW.loadedTextures[id].pixels == NULL)
4792 {
4793 RLSW.errCode = SW_INVALID_OPERATION;
4794 return;
4795 }
4796
4797 RLSW.currentTexture = id;
4798}
4799
4800#endif // RLSW_IMPLEMENTATION
4801
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit