From c1cddb1b0e82cf30bd8ea8418e42a0508b4c49b0 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Mon, 11 Apr 2022 10:51:59 -0400
Subject: [PATCH 01/54] gpu: A first shot at defining an API through new
 headers.

This has a long way to go yet.
---
 include/SDL_gpu.h          | 637 +++++++++++++++++++++++++++++++++++++
 include/SDL_gpu_compiler.h |  59 ++++
 2 files changed, 696 insertions(+)
 create mode 100644 include/SDL_gpu.h
 create mode 100644 include/SDL_gpu_compiler.h

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
new file mode 100644
index 0000000000000..184a6ef8df3cc
--- /dev/null
+++ b/include/SDL_gpu.h
@@ -0,0 +1,637 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_gpu_h_
+#define SDL_gpu_h_
+
+/**
+ *  \file SDL_gpu.h
+ *
+ *  Header for the SDL GPU routines.
+ */
+
+#include "SDL_stdinc.h"
+#include "SDL_error.h"
+
+#include "begin_code.h"
+/* Set up for C function definitions, even when using C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef SDL_SUPPRESS_GPU_API_UNSTABLE_WARNING
+#warning The SDL GPU API is still in development. Expect things to change!
+#warning DO NOT SHIP BUILDS OF SDL TO THE PUBLIC WITH THIS CODE IN IT.
+#warning DO NOT SHIP _ANYTHING_ THAT USES THIS API.
+#warning This warning will be removed when the API stabilizes.
+#endif
+
+/* !!! FIXME: this all needs formal (and significantly more robust) documentation. */
+
+/*
+ * The basic sizzle reel:
+ *  - You work in terms of modern GPU APIs without having to bog down
+ *    in their specific minutiae.
+ *  - It works on several APIs behind the scenes.
+ *  - It's about _removing existing limitations_ without giving up
+ *    most comfort, portability, or performance.
+ *  - You write shaders in a simple language once, and either ship
+ *    shader source code or bytecode. At runtime, we figure out how to
+ *    make it work.
+ *  - You work in one coordinate system and we deal with the
+ *    differences for you.
+ *
+ *  !!! FIXME: explain shader policy (it's all a simple C-like thing,
+ *  !!! FIXME: meant to be easy-ish to parse and write in. It is not
+ *  !!! FIXME: GLSL or HLSL or MSL, because while being able to reuse
+ *  !!! FIXME: shaders would be nice, the language specs are huge and
+ *  !!! FIXME: carry various API-specific quirks, making them bad fits
+ *  !!! FIXME: for SDL. It's quite possible to build external tools
+ *  !!! FIXME: that will convert from existing shader languages to
+ *  !!! FIXME: SDL_gpu. The same ideas apply to shader bytecode.)
+ *
+ *  !!! FIXME: document coordinate systems (it's the same as Direct3D 12
+ *  !!! FIXME:  and Metal, which is what WebGPU landed on, too. We'll
+ *  !!! FIXME:  convert behind the scenes for OpenGL and Vulkan where
+ *  !!! FIXME:  appropriate).
+ *
+ *  Some rules and limitations:
+ *  - There is no software renderer, and this API will not make heroic
+ *    efforts to work on ancient GPUs and APIs.
+ *  - this doesn't expose all of Metal/Vulkan/DX12. We are trying to
+ *    drastically improve on SDL's render API functionality while
+ *    keeping it simple-ish. Modern APIs put most of the heavy lifting
+ *    into shaders, command queues, and precooked state objects, and
+ *    we are exposing that specific set, which is powerful enough for
+ *    almost anything you want to build outside of the highest of
+ *    high end triple-AAA titles.
+ *  - This exposes a feature set that the underlying API probably can't
+ *    entirely lift before OpenGL 4 or Direct3D 11. For example, it allows
+ *    vertex shaders to use samplers, which wasn't available in
+ *    Direct3D 10. D3D11 was available in the retail release of
+ *    Windows 7, though--and backported to Vista!--which is probably
+ *    reasonable. It also means ancient, now-garbage GPUs are not and
+ *    will not be supported. Hypothetically we could make _most_ of this
+ *    work on OpenGL 2 and Direct3D 9, but why bother in modern times?
+ *    (then again: maybe we can support enough of this to make many
+ *    reasonable apps run on older GL/D3D, and just fail in
+ *    SDL_GpuLoadShader on unsupported stuff).
+ *  - Modern GPUs expect you to draw triangles, lines, or points.
+ *    There are no quads or complex polygons. You can build them out of
+ *    triangles yourself when you need them.
+ *  - Modern APIs expose an enormous amount of fine-grained resource
+ *    management, but I've opted for something simpler: there are GPU
+ *    buffers and CPU buffers, and you have to queue a blit command to
+ *    transfer between them. All the other stuff about what type of
+ *    memory a buffer should be in, or CPU cache modes, etc, is mostly
+ *    hidden here. GPU does fast things with GPU buffers, CPU does fast
+ *    things with CPU buffers, transferring between them is slow, done.
+ *  - You are NOT allowed to call into the underlying API directly.
+ *    You can not force this to use OpenGL so you can intermingle
+ *    your own OpenGL calls, etc. There is no compatibility functions
+ *    to pull lowlevel API handles out of this to use in your own app.
+ *    If you want to do this: just copy the source code out of here
+ *    into your app, do what you like with it, and don't file a bug report.
+ *  - The shader compiler is meant to be fast and lightweight. It does
+ *    not do heavy optimizations of your code. It's meant to let you
+ *    deal with source code at runtime, if you need to generate it on
+ *    the fly for various reasons.
+ *  - The shader bytecode is also meant to be fast and lightweight. Its
+ *    primary goal is to convert quickly to whatever the underlying API
+ *    needs. It's possible the underlying API might do an optimization
+ *    pass, though.
+ *  - There's no reason an offline compiler can't optimize the bytecode
+ *    passed in here, but this doesn't currently exist and will not
+ *    be implemented as a standard piece of the runtime.
+ *
+ *
+ *  some things that modern GPU APIs offer that we aren't (currently) exposing:
+ * 
+ *  - compute
+ *  - geometry shaders
+ *  - instancing
+ *  - tesselation
+ *  - ray tracing
+ *  - multisample  ( !!! FIXME: maybe add this)
+ *  - device enumeration/selection
+ *  - multiple command queues (you can encode multiple command buffers, from multiple threads, though)
+ *  - compressed texture formats
+ *  - Most of the wild list of uncompressed texture formats.
+ *  - texture arrays
+ *  - texture slices (with the exception of cubemap faces)
+ *
+ *  !!! FIXME: enumerate lowlevel APIs? In theory a Windows machine
+ *   could offer all of Direct3D 9-12, Vulkan, OpenGL, GLES, etc...
+ */
+
+/* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
+
+typedef struct SDL_GpuDevice *SDL_GpuDevice;
+SDL_GpuDevice *SDL_GpuCreateDevice(const char *name);  /* `name` is for debugging, not a specific device name to access. */
+void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
+
+/* CPU buffers live in RAM and can be accessed by the CPU. */
+SDL_GpuBuffer *SDL_GpuCreateCPUBuffer(SDL_GpuDevice *device, const Uint32 buflen);
+void *SDL_GpuLockCPUBuffer(SDL_GpuBuffer *buffer, Uint32 *_buflen);
+void SDL_GpuUnlockCPUBuffer(SDL_GpuBuffer *buffer);
+
+/*
+ * GPU buffers live in GPU-specific memory and can not be accessed by the CPU.
+ *  If you need to get data to/from a GPU buffer, encode a blit operation
+ *  to move it from/to a CPU buffer. Once in a CPU buffer, you can lock it to access data in your code.
+ */
+SDL_GpuBuffer *SDL_GpuCreateBuffer(SDL_GpuDevice *device, const Uint32 length);
+void SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer);
+
+
+typedef enum SDL_GpuTextureType
+{
+    SDL_GPUTEXTYPE_1D,
+    SDL_GPUTEXTYPE_2D,
+    SDL_GPUTEXTYPE_CUBE,
+    SDL_GPUTEXTYPE_3D
+} SDL_GpuTextureType;
+
+typedef enum SDL_GpuPixelFormat
+{
+    SDL_GPUPIXELFMT_B5G6R5,
+    SDL_GPUPIXELFMT_BGR5A1,
+    SDL_GPUPIXELFMT_RGBA8,
+    SDL_GPUPIXELFMT_RGBA8_sRGB,
+    SDL_GPUPIXELFMT_BGRA8,
+    SDL_GPUPIXELFMT_BGRA8_sRGB,
+    SDL_GPUPIXELFMT_Depth24_Stencil8
+    /* !!! FIXME: s3tc? pvrtc? */
+} SDL_GpuPixelFormat;
+
+typedef enum SDL_GpuTextureUsage
+{
+    SDL_GPUTEXUSAGE_SHADERREAD,
+    SDL_GPUTEXUSAGE_SHADERWRITE,
+    SDL_GPUTEXUSAGE_RENDERTARGET
+} SDL_GpuTextureUsage;
+
+typedef struct SDL_GpuTextureDescription
+{
+    const char *name;
+    SDL_GpuTextureType texture_type;
+    SDL_GpuPixelFormat pixel_format;
+    SDL_GpuTextureUsage usage;
+    Uint32 width;
+    Uint32 height;
+    Uint32 depth;
+    Uint32 mipmap_levels;
+} SDL_GpuTextureDescription;
+
+SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
+void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
+
+/* compiling shaders is a different (and optional at runtime) piece, in SDL_gpu_compiler.h */
+SDL_GpuShader *SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);
+void SDL_GpuDestroyShader(SDL_GpuShader *shader);
+
+
+/* !!! FIXME: I don't know what this is going to look like yet, this is a placeholder. */
+SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device);
+
+
+/* PRECOOKED STATE OBJECTS... */
+
+typedef enum SDL_GpuBlendOperation
+{
+    SDL_GPUBLENDOP_ADD,
+    SDL_GPUBLENDOP_SUBTRACT,
+    SDL_GPUBLENDOP_REVERSESUBTRACT,
+    SDL_GPUBLENDOP_MIN,
+    SDL_GPUBLENDOP_MAX
+} SDL_GpuBlendOperation;
+
+typedef enum SDL_GpuBlendFactor
+{
+    SDL_GPUBLENDFACTOR_ZERO,
+    SDL_GPUBLENDFACTOR_ONE,
+    SDL_GPUBLENDFACTOR_SOURCECOLOR,
+    SDL_GPUBLENDFACTOR_ONEMINUSSOURCECOLOR,
+    SDL_GPUBLENDFACTOR_SOURCEALPHA,
+    SDL_GPUBLENDFACTOR_ONEMINUSSOURCEALPHA,
+    SDL_GPUBLENDFACTOR_DESTINATIONCOLOR,
+    SDL_GPUBLENDFACTOR_ONEMINUSDESTINATIONCOLOR,
+    SDL_GPUBLENDFACTOR_DESTINATIONALPHA,
+    SDL_GPUBLENDFACTOR_ONEMINUSDESTINATIONALPHA,
+    SDL_GPUBLENDFACTOR_SOURCEALPHASATURATED,
+    SDL_GPUBLENDFACTOR_BLENDCOLOR,
+    SDL_GPUBLENDFACTOR_ONEMINUSBLENDCOLOR,
+    SDL_GPUBLENDFACTOR_BLENDALPHA,
+    SDL_GPUBLENDFACTOR_ONEMINUSBLENDALPHA,
+    SDL_GPUBLENDFACTOR_SOURCE1COLOR,
+    SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1COLOR,
+    SDL_GPUBLENDFACTOR_SOURCE1ALPHA,
+    SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1ALPHA
+} SDL_GpuBlendFactor;
+
+typedef struct SDL_GpuColorAttachmentDescription
+{
+    SDL_GpuPixelFormat pixel_format;
+    SDL_bool writemask_enabled_red;
+    SDL_bool writemask_enabled_blue;
+    SDL_bool writemask_enabled_green;
+    SDL_bool writemask_enabled_alpha;
+    SDL_bool blending_enabled;
+    SDL_GpuBlendOperation alpha_blend_op;
+    SDL_GpuBlendFactor alpha_src_blend_factor;
+    SDL_GpuBlendFactor alpha_dst_blend_factor;
+    SDL_GpuBlendOperation rgb_blend_op;
+    SDL_GpuBlendFactor rgb_src_blend_factor;
+    SDL_GpuBlendFactor rgb_dst_blend_factor;
+} SDL_GpuColorAttachmentDescription;
+
+typedef enum SDL_GpuVertexFormat
+{
+    SDL_GPUVERTFMT_UCHAR,
+    SDL_GPUVERTFMT_UCHAR2,
+    SDL_GPUVERTFMT_UCHAR3,
+    SDL_GPUVERTFMT_UCHAR4,
+    SDL_GPUVERTFMT_CHAR,
+    SDL_GPUVERTFMT_CHAR2,
+    SDL_GPUVERTFMT_CHAR3,
+    SDL_GPUVERTFMT_CHAR4,
+    SDL_GPUVERTFMT_UCHAR_NORMALIZED,
+    SDL_GPUVERTFMT_UCHAR2_NORMALIZED,
+    SDL_GPUVERTFMT_UCHAR3_NORMALIZED,
+    SDL_GPUVERTFMT_UCHAR4_NORMALIZED,
+    SDL_GPUVERTFMT_CHAR_NORMALIZED,
+    SDL_GPUVERTFMT_CHAR2_NORMALIZED,
+    SDL_GPUVERTFMT_CHAR3_NORMALIZED,
+    SDL_GPUVERTFMT_CHAR4_NORMALIZED,
+    SDL_GPUVERTFMT_USHORT,
+    SDL_GPUVERTFMT_USHORT2,
+    SDL_GPUVERTFMT_USHORT3,
+    SDL_GPUVERTFMT_USHORT4,
+    SDL_GPUVERTFMT_SHORT,
+    SDL_GPUVERTFMT_SHORT2,
+    SDL_GPUVERTFMT_SHORT3,
+    SDL_GPUVERTFMT_SHORT4,
+    SDL_GPUVERTFMT_USHORT_NORMALIZED,
+    SDL_GPUVERTFMT_USHORT2_NORMALIZED,
+    SDL_GPUVERTFMT_USHORT3_NORMALIZED,
+    SDL_GPUVERTFMT_USHORT4_NORMALIZED,
+    SDL_GPUVERTFMT_SHORT_NORMALIZED,
+    SDL_GPUVERTFMT_SHORT2_NORMALIZED,
+    SDL_GPUVERTFMT_SHORT3_NORMALIZED,
+    SDL_GPUVERTFMT_SHORT4_NORMALIZED,
+    SDL_GPUVERTFMT_HALF,
+    SDL_GPUVERTFMT_HALF2,
+    SDL_GPUVERTFMT_HALF3,
+    SDL_GPUVERTFMT_HALF4,
+    SDL_GPUVERTFMT_FLOAT,
+    SDL_GPUVERTFMT_FLOAT2,
+    SDL_GPUVERTFMT_FLOAT3,
+    SDL_GPUVERTFMT_FLOAT4,
+    SDL_GPUVERTFMT_UINT,
+    SDL_GPUVERTFMT_UINT2,
+    SDL_GPUVERTFMT_UINT3,
+    SDL_GPUVERTFMT_UINT4,
+    SDL_GPUVERTFMT_INT,
+    SDL_GPUVERTFMT_INT2,
+    SDL_GPUVERTFMT_INT3,
+    SDL_GPUVERTFMT_INT4
+} SDL_GpuVertexFormat;
+
+typedef struct SDL_GpuVertexAttributeDescription
+{
+    SDL_GpuVertexFormat format;
+    Uint32 offset;
+    Uint32 stride;
+    Uint32 index;
+} SDL_GpuVertexAttributeDescription;
+
+#define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
+#define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
+typedef struct SDL_GpuPipelineDescription
+{
+    const char *name;
+    SDL_GpuShader *vertex_shader;
+    SDL_GpuShader *fragment_shader;
+    Uint32 num_vertex_attributes;
+    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];
+    Uint32 num_color_attachments;
+    SDL_GpuColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
+    SDL_GpuPixelFormat depth_format;
+    SDL_GpuPixelFormat stencil_format;
+} SDL_GpuPipelineDescription;
+
+typedef struct SDL_GpuPipeline SDL_GpuPipeline;
+SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
+void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
+
+/* DepthStencil (or rather, state relating to depth and stencil, which get clumped together) is something
+   you cook once into an object and reuse over and over). */
+typedef enum SDL_GpuCompareFunction
+{
+    SDL_GPUCMPFUNC_NEVER,
+    SDL_GPUCMPFUNC_LESS,
+    SDL_GPUCMPFUNC_EQUAL,
+    SDL_GPUCMPFUNC_LESSEQUAL,
+    SDL_GPUCMPFUNC_GREATER,
+    SDL_GPUCMPFUNC_NOTEQUAL,
+    SDL_GPUCMPFUNC_GREATEREQUAL,
+    SDL_GPUCMPFUNC_ALWAYS
+} SDL_GpuCompareFunction;
+
+typedef enum SDL_GpuStencilOperation
+{
+    SDL_GPUSTENCILOP_KEEP,
+    SDL_GPUSTENCILOP_ZERO,
+    SDL_GPUSTENCILOP_REPLACE,
+    SDL_GPUSTENCILOP_INCREMENTCLAMP,
+    SDL_GPUSTENCILOP_DECREMENTCLAMP,
+    SDL_GPUSTENCILOP_INVERT,
+    SDL_GPUSTENCILOP_INCREMENTWRAP,
+    SDL_GPUSTENCILOP_DECREMENTWRAP
+} SDL_GpuStencilOperation;
+
+typedef struct SDL_GpuDepthStencilDescription
+{
+    const char *name;
+    SDL_bool depth_write_enabled;
+    Uint32 stencil_read_mask;
+    Uint32 stencil_write_mask;
+    SDL_GpuCompareFunction depth_function;
+    SDL_GpuCompareFunction stencil_function;
+    SDL_GpuStencilOperation stencil_fail;
+    SDL_GpuStencilOperation depth_fail;
+    SDL_GpuStencilOperation depth_and_stencil_pass;
+} SDL_GpuDepthStencilDescription;
+
+typedef struct SDL_GpuDepthStencil SDL_GpuDepthStencil;
+SDL_GpuDepthStencil *SDL_GpuCreateDepthStencil(SDL_GpuDevice *device, const SDL_GpuDepthStencilDescription *desc);
+void SDL_GpuDestroyDepthStencil(SDL_GpuDepthStencil *depthstencil);
+
+
+typedef enum SDL_GpuSamplerAddressMode
+{
+    SDL_GPUSAMPADDR_CLAMPTOEDGE,
+    SDL_GPUSAMPADDR_MIRRORCLAMPTOEDGE,
+    SDL_GPUSAMPADDR_REPEAT,
+    SDL_GPUSAMPADDR_MIRRORREPEAT,
+    SDL_GPUSAMPADDR_CLAMPTOZERO,
+    SDL_GPUSAMPADDR_CLAMPTOBORDERCOLOR
+} SDL_GpuSamplerAddressMode;
+
+typedef enum SDL_GpuSamplerBorderColor
+{
+    SDL_GPUSAMPBORDER_TRANSPARENT_BLACK,
+    SDL_GPUSAMPBORDER_OPAQUE_BLACK,
+    SDL_GPUSAMPBORDER_OPAQUE_WHITE
+} SDL_GpuSamplerBorderColor;
+
+typedef enum SDL_GpuSamplerMinMagFilter
+{
+    SDL_GPUMINMAGFILTER_NEAREST,
+    SDL_GPUMINMAGFILTER_LINEAR
+} SDL_GpuSamplerMinMagFilter;
+
+typedef enum SDL_GpuSamplerMipFilter
+{
+    SDL_GPUMIPFILTER_NOTMIPMAPPED,
+    SDL_GPUMIPFILTER_NEAREST,
+    SDL_GPUMIPFILTER_LINEAR
+} SDL_GpuSamplerMipFilter;
+
+typedef struct SDL_GpuSamplerDescription
+{
+    const char *name;
+    SDL_GpuSamplerAddressMode addrmode_u;
+    SDL_GpuSamplerAddressMode addrmode_v;
+    SDL_GpuSamplerAddressMode addrmode_r;
+    SDL_GpuSamplerBorderColor border_color;
+    SDL_GpuSamplerMinMagFilter min_filter;
+    SDL_GpuSamplerMinMagFilter mag_filter;
+    SDL_GpuSamplerMipFilter mip_filter;
+} SDL_GpuSamplerDescription;
+
+typedef struct SDL_GpuSampler SDL_GpuSampler;
+SDL_GpuSampler *SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc);
+void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
+
+
+
+/*
+ * STATE CACHE CONVENIENCE FUNCTIONS...
+ *
+ * If you have only a few pipeline/etc states, or you have a system to manage
+ *  them already, you don't need to use a StateCache. But if you're
+ *  planning to make a bunch of states, we can manage them for you so you
+ *  just tell us what you need and we either give you a previously-made object
+ *  or create/cache a new one as needed. You can have multiple caches, so as
+ *  to group related states together. You can then dump all the states at
+ *  once, perhaps on level load, by deleting a specific cache.
+ */
+typedef struct SDL_GpuStateCache SDL_GpuStateCache;
+SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *name, SDL_GpuDevice *device);
+SDL_GpuPipeline *SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc);
+SDL_GpuDepthStencil *SDL_GpuGetCachedDepthStencil(SDL_GpuStateCache *cache, const SDL_GpuDepthStencilDescription *desc);
+SDL_GpuSampler *SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
+void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
+
+
+/*
+ * COMMAND BUFFERS...
+ *
+ *  Commands to send to the GPU are encoded into command buffers.
+ *   You make a command buffer, you encode commands into it, then you
+ *   commit the buffer. Once committed, the buffer is sent to the GPU.
+ *   The GPU processes these buffers in the order they were committed,
+ *   running the buffer's commands in the order they were encoded.
+ *  Command buffers may be encoded and committed from any thread, but
+ *   only one thread may encode to a given command buffer at a time.
+ *  You can only have one encoder for a command buffer at a time, but
+ *   you can encode different types of commands (rendering and blitting,
+ *   etc) into the same command buffer.
+ */
+typedef struct SDL_GpuCommandBuffer SDL_GpuCommandBuffer;
+SDL_GpuCommandBuffer *SDL_GpuCreateCommandBuffer(const char *name, SDL_GpuDevice *device);
+
+
+/* RENDERING PASSES... */
+
+typedef enum SDL_GpuPassInit
+{
+    SDL_GPUPASSINIT_UNDEFINED,
+    SDL_GPUPASSINIT_LOAD,
+    SDL_GPUPASSINIT_CLEAR
+} SDL_GpuPassInit;
+
+typedef enum SDL_GpuCullFace
+{
+    SDL_GPUCULLFACE_BACK,
+    SDL_GPUCULLFACE_FRONT,
+    SDL_GPUCULLFACE_NONE
+} SDL_GpuCullFace;
+
+typedef struct SDL_GpuColorAttachmentDescription
+{
+    SDL_GpuTexture *texture;   /* MUST be created with render target support! */
+    SDL_GpuPassInit color_init;
+    double clear_red;
+    double clear_green;
+    double clear_blue;
+    double clear_alpha;
+} SDL_GpuColorAttachmentDescription;
+
+typedef struct SDL_GpuDepthAttachmentDescription
+{
+    SDL_GpuTexture *texture;   /* MUST be created with render target support! */
+    SDL_GpuPassInit depth_init;
+    double clear_depth;
+} SDL_GpuDepthAttachmentDescription;
+
+typedef struct SDL_GpuStencilAttachmentDescription
+{
+    SDL_GpuTexture *texture;   /* MUST be created with render target support! */
+    SDL_GpuPassInit stencil_init;
+    Uint32 clear_stencil;
+} SDL_GpuDepthAttachmentDescription;
+
+/* start encoding a render pass to a command buffer. You can only encode one type of pass to a command buffer at a time. End this pass to start encoding another. */
+SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer *cmdbuf,
+                            Uint32 num_color_attachments,
+                            const SDL_GpuColorAttachmentDescription *color_attachments,
+                            const SDL_GpuDepthAttachmentDescription *depth_attachment,
+                            const SDL_GpuStencilAttachmentDescription *stencil_attachment);
+
+
+/*
+ * These functions encode commands into the render pass...
+ * 
+ *  New states can be encoded into the render pass with these function and future encoded
+ *   commands will use it. Previously encoded commands use whatever the current state
+ *   was set to at the time. Try not to encode redundant state changes into a render pass
+ *   as they will take resources to do nothing.
+ */
+void SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
+void SDL_GpuSetRenderPassDepthStencil(SDL_GpuRenderPass *pass, SDL_GpuDepthStencil *depthstencil);
+
+/* non-zero to fill triangles, SDL_FALSE to just draw lines (wireframe). If never set, the render pass defaults to SDL_TRUE. */
+void SDL_GpuSetRenderPassFillMode(SDL_GpuRenderPass *pass, const SDL_bool filled);
+
+/* non-zero to treak clockwise winding as front-facing, SDL_FALSE for counter-clockwise. If never set, the render pass defaults to SDL_TRUE. */
+void SDL_GpuSetRenderPassWinding(SDL_GpuRenderPass *pass, const SDL_bool clockwise);
+
+/* If never set, the render pass defaults to SDL_GPUCULLFACE_BACK. */
+void SDL_GpuSetRenderPassCullMode(SDL_GpuRenderPass *pass, const SDL_GpuCullFace cullfront);
+
+void SDL_GpuSetRenderPassDepthBias(SDL_GpuRenderPass *pass, const float bias, const float scale, const float clamp);
+
+/* If never set, defaults to zero for both front and back. */
+void SDL_GpuSetRenderPassStencilReferenceValues(SDL_GpuRenderPass *pass, const Uint32 front, const Uint32 back);
+
+void SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
+void SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
+void SDL_GpuSetRenderBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);
+
+void SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+void SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+void SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+
+void SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+void SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+void SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+
+typedef enum SDL_GpuPrimitive
+{
+    SDL_GPUPRIM_POINT,
+    SDL_GPUPRIM_LINE,
+    SDL_GPUPRIM_LINESTRIP,
+    SDL_GPUPRIM_TRIANGLE,
+    SDL_GPUPRIM_TRIANGLESTRIP
+} SDL_GpuPrimitive;
+
+typedef enum SDL_GpuIndexType
+{
+    SDL_GPUINDEXTYPE_UINT16,
+    SDL_GPUINDEXTYPE_UINT32
+} SDL_GpuIndexType;
+
+void SDL_GpuDrawPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 vertex_start, Uint32 vertex_count);
+void SDL_GpuDrawIndexedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
+
+/* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
+void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
+
+/* start encoding a blit pass to a command buffer. You can only encode one type of pass to a command buffer at a time.  End this pass to start encoding another. */
+SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *name, SDL_GpuCommandBuffer *cmdbuf);
+void SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+                                 Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                                 Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                 SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                                 Uint32 dstx, Uint32 dsty, Uint32 dstz);
+
+void SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, unsigned char value);
+
+void SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
+
+void SDL_GpuCopyBetweenBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+
+void SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
+                                     Uint32 srcpitch, Uint32 srcimgpitch,
+                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                     SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                                     Uint32 dstx, Uint32 dsty, Uint32 dstz);
+
+void SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+                                     Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                     SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
+
+/* Done encoding this blit pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
+void SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
+
+
+/*
+ * COMMAND BUFFER SUBMISSION ...
+ *
+ * When submitting command buffers, you can optionally specify a fence. This fence object is used to tell you when
+ *  the GPU has completed the work submitted in this batch, so your program can tell when it's completed some effort
+ *  and if it's safe to touch resources that are no longer in-flight.
+ */
+typedef struct SDL_GpuFence SDL_GpuFence;
+SDL_GpuFence *SDL_GpuCreateFence(SDL_GpuDevice *device);
+void SDL_GpuDestroyFence(SDL_GpuFence *fence);
+int SDL_GpuQueryFence(SDL_GpuFence *fence);
+int SDL_GpuResetFence(SDL_GpuFence *fence);
+int SDL_GpuWaitFence(SDL_GpuFence *fence);
+
+/*
+ * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
+ * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
+ */
+void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, const SDL_bool also_present, SDL_GpuFence *fence);
+
+/* Ends C function definitions when using C++ */
+#ifdef __cplusplus
+}
+#endif
+#include "close_code.h"
+
+#endif /* SDL_gpu_h_ */
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/include/SDL_gpu_compiler.h b/include/SDL_gpu_compiler.h
new file mode 100644
index 0000000000000..9ae957782cea2
--- /dev/null
+++ b/include/SDL_gpu_compiler.h
@@ -0,0 +1,59 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_gpu_compiler_h_
+#define SDL_gpu_compiler_h_
+
+/**
+ *  \file SDL_gpu_compiler.h
+ *
+ *  Header for the SDL GPU compiler routines.
+ */
+
+#include "SDL_gpu.h"
+
+#include "begin_code.h"
+/* Set up for C function definitions, even when using C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* !!! FIXME: this all needs formal (and significantly more robust) documentation. */
+
+/*
+ * This builds shader source code into bytecode. One could use this to cook
+ * shaders offline, or pass dynamic strings at runtime. This is meant to favor
+ * speed over optimization. If one really wants a strong optimizing compiler,
+ * one should build an external tool.  :)
+ */
+int MOJOGPU_CompileShader(const char *src, const Uint32 srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
+
+/* !!! FIXME: There's probably a lot of other stuff we want to put in here. */
+
+/* Ends C function definitions when using C++ */
+#ifdef __cplusplus
+}
+#endif
+#include "close_code.h"
+
+#endif /* SDL_gpu_compiler_h_ */
+
+/* vi: set ts=4 sw=4 expandtab: */

From 250bb0dd14a4a5015f3ba6579a80df86fdf9b814 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Mon, 11 Apr 2022 17:00:26 -0400
Subject: [PATCH 02/54] gpu: Fixed namespace muscle memory.   :)

---
 include/SDL_gpu_compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/SDL_gpu_compiler.h b/include/SDL_gpu_compiler.h
index 9ae957782cea2..6c58a85ea2a06 100644
--- a/include/SDL_gpu_compiler.h
+++ b/include/SDL_gpu_compiler.h
@@ -44,7 +44,7 @@ extern "C" {
  * speed over optimization. If one really wants a strong optimizing compiler,
  * one should build an external tool.  :)
  */
-int MOJOGPU_CompileShader(const char *src, const Uint32 srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
+int SDL_GpuCompileShader(const char *src, const Uint32 srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
 
 /* !!! FIXME: There's probably a lot of other stuff we want to put in here. */
 

From 1398624768fc1a89f12a76aadb71ddfec273ab43 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Mon, 11 Apr 2022 23:51:42 -0400
Subject: [PATCH 03/54] gpu: Remove some vertex formats.

SDL_GPUVERTFMT_CHAR
SDL_GPUVERTFMT_UCHAR
SDL_GPUVERTFMT_UCHAR3
SDL_GPUVERTFMT_CHAR3
SDL_GPUVERTFMT_USHORT3
SDL_GPUVERTFMT_SHORT3
SDL_GPUVERTFMT_HALF3

(plus normalized versions of these.)

Mentioned by @darksylinc:

"Get rid of them (including the NORMALIZED variants). GPU support is flaky
for them (or non-existent) and requires expensive emulation, which is hard
to get right due to alignment issues."
---
 include/SDL_gpu.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 184a6ef8df3cc..c82b985c7f916 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -265,41 +265,28 @@ typedef struct SDL_GpuColorAttachmentDescription
 
 typedef enum SDL_GpuVertexFormat
 {
-    SDL_GPUVERTFMT_UCHAR,
     SDL_GPUVERTFMT_UCHAR2,
-    SDL_GPUVERTFMT_UCHAR3,
     SDL_GPUVERTFMT_UCHAR4,
-    SDL_GPUVERTFMT_CHAR,
     SDL_GPUVERTFMT_CHAR2,
-    SDL_GPUVERTFMT_CHAR3,
     SDL_GPUVERTFMT_CHAR4,
-    SDL_GPUVERTFMT_UCHAR_NORMALIZED,
     SDL_GPUVERTFMT_UCHAR2_NORMALIZED,
-    SDL_GPUVERTFMT_UCHAR3_NORMALIZED,
     SDL_GPUVERTFMT_UCHAR4_NORMALIZED,
-    SDL_GPUVERTFMT_CHAR_NORMALIZED,
     SDL_GPUVERTFMT_CHAR2_NORMALIZED,
-    SDL_GPUVERTFMT_CHAR3_NORMALIZED,
     SDL_GPUVERTFMT_CHAR4_NORMALIZED,
     SDL_GPUVERTFMT_USHORT,
     SDL_GPUVERTFMT_USHORT2,
-    SDL_GPUVERTFMT_USHORT3,
     SDL_GPUVERTFMT_USHORT4,
     SDL_GPUVERTFMT_SHORT,
     SDL_GPUVERTFMT_SHORT2,
-    SDL_GPUVERTFMT_SHORT3,
     SDL_GPUVERTFMT_SHORT4,
     SDL_GPUVERTFMT_USHORT_NORMALIZED,
     SDL_GPUVERTFMT_USHORT2_NORMALIZED,
-    SDL_GPUVERTFMT_USHORT3_NORMALIZED,
     SDL_GPUVERTFMT_USHORT4_NORMALIZED,
     SDL_GPUVERTFMT_SHORT_NORMALIZED,
     SDL_GPUVERTFMT_SHORT2_NORMALIZED,
-    SDL_GPUVERTFMT_SHORT3_NORMALIZED,
     SDL_GPUVERTFMT_SHORT4_NORMALIZED,
     SDL_GPUVERTFMT_HALF,
     SDL_GPUVERTFMT_HALF2,
-    SDL_GPUVERTFMT_HALF3,
     SDL_GPUVERTFMT_HALF4,
     SDL_GPUVERTFMT_FLOAT,
     SDL_GPUVERTFMT_FLOAT2,

From a8c72d87accf45b0ae3bfc5b8b46813da26eda2e Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 13 Apr 2022 15:36:05 -0400
Subject: [PATCH 04/54] gpu: Cook depth stencil state into SDL_GpuPipeline.

Metal keeps it separate, but D3D12 and Vulkan do not, so it makes sense to
manage a small cache of MTLDepthStencilState objects it for Metal and not
have to cache extra PSOs for everyone else.
---
 include/SDL_gpu.h | 43 +++++++++++++++----------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index c82b985c7f916..2185972c79060 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -310,27 +310,6 @@ typedef struct SDL_GpuVertexAttributeDescription
     Uint32 index;
 } SDL_GpuVertexAttributeDescription;
 
-#define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
-#define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
-typedef struct SDL_GpuPipelineDescription
-{
-    const char *name;
-    SDL_GpuShader *vertex_shader;
-    SDL_GpuShader *fragment_shader;
-    Uint32 num_vertex_attributes;
-    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];
-    Uint32 num_color_attachments;
-    SDL_GpuColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
-    SDL_GpuPixelFormat depth_format;
-    SDL_GpuPixelFormat stencil_format;
-} SDL_GpuPipelineDescription;
-
-typedef struct SDL_GpuPipeline SDL_GpuPipeline;
-SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
-void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
-
-/* DepthStencil (or rather, state relating to depth and stencil, which get clumped together) is something
-   you cook once into an object and reuse over and over). */
 typedef enum SDL_GpuCompareFunction
 {
     SDL_GPUCMPFUNC_NEVER,
@@ -355,9 +334,19 @@ typedef enum SDL_GpuStencilOperation
     SDL_GPUSTENCILOP_DECREMENTWRAP
 } SDL_GpuStencilOperation;
 
-typedef struct SDL_GpuDepthStencilDescription
+#define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
+#define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
+typedef struct SDL_GpuPipelineDescription
 {
     const char *name;
+    SDL_GpuShader *vertex_shader;
+    SDL_GpuShader *fragment_shader;
+    Uint32 num_vertex_attributes;
+    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];
+    Uint32 num_color_attachments;
+    SDL_GpuColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
+    SDL_GpuPixelFormat depth_format;
+    SDL_GpuPixelFormat stencil_format;
     SDL_bool depth_write_enabled;
     Uint32 stencil_read_mask;
     Uint32 stencil_write_mask;
@@ -366,11 +355,11 @@ typedef struct SDL_GpuDepthStencilDescription
     SDL_GpuStencilOperation stencil_fail;
     SDL_GpuStencilOperation depth_fail;
     SDL_GpuStencilOperation depth_and_stencil_pass;
-} SDL_GpuDepthStencilDescription;
+} SDL_GpuPipelineDescription;
 
-typedef struct SDL_GpuDepthStencil SDL_GpuDepthStencil;
-SDL_GpuDepthStencil *SDL_GpuCreateDepthStencil(SDL_GpuDevice *device, const SDL_GpuDepthStencilDescription *desc);
-void SDL_GpuDestroyDepthStencil(SDL_GpuDepthStencil *depthstencil);
+typedef struct SDL_GpuPipeline SDL_GpuPipeline;
+SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
+void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
 
 
 typedef enum SDL_GpuSamplerAddressMode
@@ -435,7 +424,6 @@ void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
 typedef struct SDL_GpuStateCache SDL_GpuStateCache;
 SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *name, SDL_GpuDevice *device);
 SDL_GpuPipeline *SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc);
-SDL_GpuDepthStencil *SDL_GpuGetCachedDepthStencil(SDL_GpuStateCache *cache, const SDL_GpuDepthStencilDescription *desc);
 SDL_GpuSampler *SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
 void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
 
@@ -515,7 +503,6 @@ SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer
  *   as they will take resources to do nothing.
  */
 void SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
-void SDL_GpuSetRenderPassDepthStencil(SDL_GpuRenderPass *pass, SDL_GpuDepthStencil *depthstencil);
 
 /* non-zero to fill triangles, SDL_FALSE to just draw lines (wireframe). If never set, the render pass defaults to SDL_TRUE. */
 void SDL_GpuSetRenderPassFillMode(SDL_GpuRenderPass *pass, const SDL_bool filled);

From be2442fb8fe0c5ac95c6d0e497089ffca1fc4971 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 13 Apr 2022 15:39:32 -0400
Subject: [PATCH 05/54] gpu: Specify primitive topology as part of
 SDL_GpuPipelineDescription.

---
 include/SDL_gpu.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 2185972c79060..1ba761f7faa84 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -334,11 +334,19 @@ typedef enum SDL_GpuStencilOperation
     SDL_GPUSTENCILOP_DECREMENTWRAP
 } SDL_GpuStencilOperation;
 
+typedef enum SDL_GpuTopology
+{
+    SDL_GPUTOPOLOGY_POINT,
+    SDL_GPUTOPOLOGY_LINE,
+    SDL_GPUTOPOLOGY_TRIANGLE
+} SDL_GpuTopology;
+
 #define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
 #define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
 typedef struct SDL_GpuPipelineDescription
 {
     const char *name;
+    SDL_GpuTopology topology;
     SDL_GpuShader *vertex_shader;
     SDL_GpuShader *fragment_shader;
     Uint32 num_vertex_attributes;
@@ -530,6 +538,10 @@ void SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *
 void SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
 void SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
+
+/* You need to have a SDL_GpuPipeline with a matching SDL_GpuTopology when you draw
+   (so if the topology is SDL_GPUTOPOLOGY_TRIANGLE, you can use SDL_GPUPRIM_TRIANGLE or
+   SDL_GPUPRIM_TRIANGLESTRIP but not SDL_GPUPRIM_LINE, etc) */
 typedef enum SDL_GpuPrimitive
 {
     SDL_GPUPRIM_POINT,

From 95af012ae7ee1e2c4828c9387ca6d06d332a2bb1 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 13 Apr 2022 15:42:31 -0400
Subject: [PATCH 06/54] gpu: Two unrelated structs were both named
 SDL_GpuColorAttachmentDescription.

---
 include/SDL_gpu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 1ba761f7faa84..5136d52fb4052 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -247,7 +247,7 @@ typedef enum SDL_GpuBlendFactor
     SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1ALPHA
 } SDL_GpuBlendFactor;
 
-typedef struct SDL_GpuColorAttachmentDescription
+typedef struct SDL_GpuPipelineColorAttachmentDescription
 {
     SDL_GpuPixelFormat pixel_format;
     SDL_bool writemask_enabled_red;
@@ -352,7 +352,7 @@ typedef struct SDL_GpuPipelineDescription
     Uint32 num_vertex_attributes;
     SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];
     Uint32 num_color_attachments;
-    SDL_GpuColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
+    SDL_GpuPipelineColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
     SDL_GpuPixelFormat depth_format;
     SDL_GpuPixelFormat stencil_format;
     SDL_bool depth_write_enabled;

From 37081ef906f230d9545b29c015199828a66f129b Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 15 Apr 2022 09:25:53 -0400
Subject: [PATCH 07/54] gpu: Some changes and notes as the design evolves.

---
 include/SDL_gpu.h | 55 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 5136d52fb4052..da0330f142e45 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -48,6 +48,7 @@ extern "C" {
 
 /*
  * The basic sizzle reel:
+ *
  *  - You work in terms of modern GPU APIs without having to bog down
  *    in their specific minutiae.
  *  - It works on several APIs behind the scenes.
@@ -76,7 +77,7 @@ extern "C" {
  *  Some rules and limitations:
  *  - There is no software renderer, and this API will not make heroic
  *    efforts to work on ancient GPUs and APIs.
- *  - this doesn't expose all of Metal/Vulkan/DX12. We are trying to
+ *  - This doesn't expose all of Metal/Vulkan/DX12. We are trying to
  *    drastically improve on SDL's render API functionality while
  *    keeping it simple-ish. Modern APIs put most of the heavy lifting
  *    into shaders, command queues, and precooked state objects, and
@@ -110,6 +111,9 @@ extern "C" {
  *    to pull lowlevel API handles out of this to use in your own app.
  *    If you want to do this: just copy the source code out of here
  *    into your app, do what you like with it, and don't file a bug report.
+ *    (!!! FIXME: it's been pointed out to me that there's a value in
+ *    getting the lowlevel handles so you can plug them into OpenXR for
+ *    rendering in a VR headset, and this seems worthwhile.)
  *  - The shader compiler is meant to be fast and lightweight. It does
  *    not do heavy optimizations of your code. It's meant to let you
  *    deal with source code at runtime, if you need to generate it on
@@ -123,21 +127,27 @@ extern "C" {
  *    be implemented as a standard piece of the runtime.
  *
  *
- *  some things that modern GPU APIs offer that we aren't (currently) exposing:
+ *  some things that modern GPU APIs offer that we aren't exposing
+ *  (but that does not mean we will _never_ expose)...
  * 
  *  - compute
  *  - geometry shaders
- *  - instancing
  *  - tesselation
  *  - ray tracing
- *  - multisample  ( !!! FIXME: maybe add this)
  *  - device enumeration/selection
  *  - multiple command queues (you can encode multiple command buffers, from multiple threads, though)
- *  - compressed texture formats
  *  - Most of the wild list of uncompressed texture formats.
- *  - texture arrays
  *  - texture slices (with the exception of cubemap faces)
  *
+ *  some things I said no to originally that I was later convinced to support:
+ *
+ *  - multisample
+ *  - texture arrays
+ *  - compressed texture formats
+ *  - instancing
+ */
+
+/*
  *  !!! FIXME: enumerate lowlevel APIs? In theory a Windows machine
  *   could offer all of Direct3D 9-12, Vulkan, OpenGL, GLES, etc...
  */
@@ -167,7 +177,9 @@ typedef enum SDL_GpuTextureType
     SDL_GPUTEXTYPE_1D,
     SDL_GPUTEXTYPE_2D,
     SDL_GPUTEXTYPE_CUBE,
-    SDL_GPUTEXTYPE_3D
+    SDL_GPUTEXTYPE_3D,
+    SDL_GPUTEXTYPE_2D_ARRAY,
+    SDL_GPUTEXTYPE_CUBE_ARRAY
 } SDL_GpuTextureType;
 
 typedef enum SDL_GpuPixelFormat
@@ -179,14 +191,18 @@ typedef enum SDL_GpuPixelFormat
     SDL_GPUPIXELFMT_BGRA8,
     SDL_GPUPIXELFMT_BGRA8_sRGB,
     SDL_GPUPIXELFMT_Depth24_Stencil8
-    /* !!! FIXME: s3tc? pvrtc? */
+    /* !!! FIXME: some sort of YUV format to let movies stream efficiently? */
+}   /* !!! FIXME: s3tc? pvrtc? other compressed formats? We'll need a query for what's supported, and/or guarantee it with a software fallback...? */
 } SDL_GpuPixelFormat;
 
+/* you can specify multiple values OR'd together for texture usage, for example if you are going to render to it and then later
+   sample the rendered-to texture's contents in a shader, you'd want RENDER_TARGET|SHADER_READ */
 typedef enum SDL_GpuTextureUsage
 {
-    SDL_GPUTEXUSAGE_SHADERREAD,
-    SDL_GPUTEXUSAGE_SHADERWRITE,
-    SDL_GPUTEXUSAGE_RENDERTARGET
+    SDL_GPUTEXUSAGE_SHADER_READ = (1 << 0),    /* If you sample from a texture, you need this flag. */
+    SDL_GPUTEXUSAGE_SHADER_WRITE = (1 << 1),
+    SDL_GPUTEXUSAGE_RENDER_TARGET = (1 << 2),   /* Draw to this texture! You don't need to set SHADER_WRITE to use this flag! */
+    SDL_GPUTEXUSAGE_NO_SAMPLE = (1 << 3)   /* You won't sample from this texture at all, just read or write it. */
 } SDL_GpuTextureUsage;
 
 typedef struct SDL_GpuTextureDescription
@@ -194,10 +210,10 @@ typedef struct SDL_GpuTextureDescription
     const char *name;
     SDL_GpuTextureType texture_type;
     SDL_GpuPixelFormat pixel_format;
-    SDL_GpuTextureUsage usage;
+    SDL_GpuTextureUsage usage;  /* OR SDL_GpuTextureUsage values together */
     Uint32 width;
     Uint32 height;
-    Uint32 depth;
+    Uint32 depth_or_slices;
     Uint32 mipmap_levels;
 } SDL_GpuTextureDescription;
 
@@ -334,6 +350,10 @@ typedef enum SDL_GpuStencilOperation
     SDL_GPUSTENCILOP_DECREMENTWRAP
 } SDL_GpuStencilOperation;
 
+/* !!! FIXME: is there ever a time you're going to want to change a given pipeline
+   !!! FIXME:  from TRIANGLE to TRIANGLESTRIP? Maybe we should just put the
+   !!! FIXME:  specific primitive type in the pipeline and take the subtype out of
+   !!! FIXME:  the draw call, to simplify the API. */
 typedef enum SDL_GpuTopology
 {
     SDL_GPUTOPOLOGY_POINT,
@@ -350,9 +370,9 @@ typedef struct SDL_GpuPipelineDescription
     SDL_GpuShader *vertex_shader;
     SDL_GpuShader *fragment_shader;
     Uint32 num_vertex_attributes;
-    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];
+    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];  /* !!! FIXME: maybe don't hardcode a static array? */
     Uint32 num_color_attachments;
-    SDL_GpuPipelineColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];
+    SDL_GpuPipelineColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];  /* !!! FIXME: maybe don't hardcode a static array? */
     SDL_GpuPixelFormat depth_format;
     SDL_GpuPixelFormat stencil_format;
     SDL_bool depth_write_enabled;
@@ -559,6 +579,8 @@ typedef enum SDL_GpuIndexType
 
 void SDL_GpuDrawPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 vertex_start, Uint32 vertex_count);
 void SDL_GpuDrawIndexedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
+void SDL_GpuDrawInstancedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
+void SDL_GpuDrawInstancedIndexedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
 
 /* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
 void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
@@ -609,9 +631,12 @@ int SDL_GpuWaitFence(SDL_GpuFence *fence);
 /*
  * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
+ * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
  */
 void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, const SDL_bool also_present, SDL_GpuFence *fence);
 
+/* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
+
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 }

From ea7d2ae138f20dc877259acfbdd09dd7051ba436 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sat, 16 Apr 2022 11:51:09 -0400
Subject: [PATCH 08/54] gpu: Move primitive type into the PSO, remove topology.

It seems silly to specify this separately, and it's only going to confuse
things when an app tries to make a PSO for lines and then make a draw call
with triangles.

Also "topology" feels like a really esoteric word to the uninitiated, even
if isn't really.
---
 include/SDL_gpu.h | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index da0330f142e45..e3ca9bc624ddc 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -350,23 +350,21 @@ typedef enum SDL_GpuStencilOperation
     SDL_GPUSTENCILOP_DECREMENTWRAP
 } SDL_GpuStencilOperation;
 
-/* !!! FIXME: is there ever a time you're going to want to change a given pipeline
-   !!! FIXME:  from TRIANGLE to TRIANGLESTRIP? Maybe we should just put the
-   !!! FIXME:  specific primitive type in the pipeline and take the subtype out of
-   !!! FIXME:  the draw call, to simplify the API. */
-typedef enum SDL_GpuTopology
+typedef enum SDL_GpuPrimitive
 {
-    SDL_GPUTOPOLOGY_POINT,
-    SDL_GPUTOPOLOGY_LINE,
-    SDL_GPUTOPOLOGY_TRIANGLE
-} SDL_GpuTopology;
+    SDL_GPUPRIM_POINT,
+    SDL_GPUPRIM_LINE,
+    SDL_GPUPRIM_LINESTRIP,
+    SDL_GPUPRIM_TRIANGLE,
+    SDL_GPUPRIM_TRIANGLESTRIP
+} SDL_GpuPrimitive;
 
 #define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
 #define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
 typedef struct SDL_GpuPipelineDescription
 {
     const char *name;
-    SDL_GpuTopology topology;
+    SDL_GpuPrimitive primitive;
     SDL_GpuShader *vertex_shader;
     SDL_GpuShader *fragment_shader;
     Uint32 num_vertex_attributes;
@@ -559,17 +557,7 @@ void SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler
 void SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
 
-/* You need to have a SDL_GpuPipeline with a matching SDL_GpuTopology when you draw
-   (so if the topology is SDL_GPUTOPOLOGY_TRIANGLE, you can use SDL_GPUPRIM_TRIANGLE or
-   SDL_GPUPRIM_TRIANGLESTRIP but not SDL_GPUPRIM_LINE, etc) */
-typedef enum SDL_GpuPrimitive
-{
-    SDL_GPUPRIM_POINT,
-    SDL_GPUPRIM_LINE,
-    SDL_GPUPRIM_LINESTRIP,
-    SDL_GPUPRIM_TRIANGLE,
-    SDL_GPUPRIM_TRIANGLESTRIP
-} SDL_GpuPrimitive;
+/* Drawing! */
 
 typedef enum SDL_GpuIndexType
 {
@@ -577,10 +565,10 @@ typedef enum SDL_GpuIndexType
     SDL_GPUINDEXTYPE_UINT32
 } SDL_GpuIndexType;
 
-void SDL_GpuDrawPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 vertex_start, Uint32 vertex_count);
-void SDL_GpuDrawIndexedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
-void SDL_GpuDrawInstancedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
-void SDL_GpuDrawInstancedIndexedPrimitives(SDL_GpuRenderPass *pass, const SDL_GpuPrimitive primitive, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
+void SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
+void SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
+void SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
+void SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
 
 /* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
 void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);

From ebb616ae655ec8a2f8f0d3716fd653c63700e5cb Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sun, 17 Apr 2022 01:33:07 -0400
Subject: [PATCH 09/54] gpu: Move the fixed-function render pass stuff into the
 Pipeline.

WebGPU does this too, and it makes sense since there's so little of it,
and we're already going to have a cache that maps SDL pipelines to
native pipelines anyhow, since different APIs store different things
in their PSOs (D3D12 stores topology, Metal doesn't store depth stencil
stuff in there, etc).
---
 include/SDL_gpu.h | 58 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index e3ca9bc624ddc..07b45cf1f9629 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -359,6 +359,27 @@ typedef enum SDL_GpuPrimitive
     SDL_GPUPRIM_TRIANGLESTRIP
 } SDL_GpuPrimitive;
 
+typedef enum SDL_GpuFillMode
+{
+    SDL_GPUFILL_FILL,  /* fill polygons */
+    SDL_GPUFILL_LINE  /* wireframe mode */
+    /* !!! FIXME: Vulkan has POINT and FILL_RECTANGLE_NV here, but Metal and D3D12 do not. */
+} SDL_GpuFillMode;
+
+typedef enum SDL_GpuFrontFace
+{
+    SDL_GPUFRONTFACE_COUNTER_CLOCKWISE,
+    SDL_GPUFRONTFACE_CLOCKWISE
+} SDL_GpuFrontFace;
+
+typedef enum SDL_GpuCullFace
+{
+    SDL_GPUCULLFACE_BACK,
+    SDL_GPUCULLFACE_FRONT,
+    SDL_GPUCULLFACE_NONE
+    /* !!! FIXME: Vulkan lets you cull front-and-back (i.e. - everything) */
+} SDL_GpuCullFace;
+
 #define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
 #define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
 typedef struct SDL_GpuPipelineDescription
@@ -376,17 +397,31 @@ typedef struct SDL_GpuPipelineDescription
     SDL_bool depth_write_enabled;
     Uint32 stencil_read_mask;
     Uint32 stencil_write_mask;
+    Uint32 stencil_reference_front;
+    Uint32 stencil_reference_back;
     SDL_GpuCompareFunction depth_function;
     SDL_GpuCompareFunction stencil_function;
     SDL_GpuStencilOperation stencil_fail;
     SDL_GpuStencilOperation depth_fail;
     SDL_GpuStencilOperation depth_and_stencil_pass;
+    SDL_GpuFillMode fill_mode;
+    SDL_GpuFrontFace front_face;
+    SDL_GpuCullFace cull_face;
+    float depth_bias;
+    float depth_bias_scale;
+    float depth_bias_clamp;
 } SDL_GpuPipelineDescription;
 
 typedef struct SDL_GpuPipeline SDL_GpuPipeline;
 SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
 void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
 
+/* these make it easier to set up a Pipeline description; set the defaults (or
+   start with an existing pipeline's state) then change what you like. */
+void SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc);
+void SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
+
+
 
 typedef enum SDL_GpuSamplerAddressMode
 {
@@ -453,6 +488,8 @@ SDL_GpuPipeline *SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_Gp
 SDL_GpuSampler *SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
 void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
 
+// !!! FIXME: read/write state caches to disk?
+
 
 /*
  * COMMAND BUFFERS...
@@ -481,13 +518,6 @@ typedef enum SDL_GpuPassInit
     SDL_GPUPASSINIT_CLEAR
 } SDL_GpuPassInit;
 
-typedef enum SDL_GpuCullFace
-{
-    SDL_GPUCULLFACE_BACK,
-    SDL_GPUCULLFACE_FRONT,
-    SDL_GPUCULLFACE_NONE
-} SDL_GpuCullFace;
-
 typedef struct SDL_GpuColorAttachmentDescription
 {
     SDL_GpuTexture *texture;   /* MUST be created with render target support! */
@@ -530,20 +560,6 @@ SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer
  */
 void SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
 
-/* non-zero to fill triangles, SDL_FALSE to just draw lines (wireframe). If never set, the render pass defaults to SDL_TRUE. */
-void SDL_GpuSetRenderPassFillMode(SDL_GpuRenderPass *pass, const SDL_bool filled);
-
-/* non-zero to treak clockwise winding as front-facing, SDL_FALSE for counter-clockwise. If never set, the render pass defaults to SDL_TRUE. */
-void SDL_GpuSetRenderPassWinding(SDL_GpuRenderPass *pass, const SDL_bool clockwise);
-
-/* If never set, the render pass defaults to SDL_GPUCULLFACE_BACK. */
-void SDL_GpuSetRenderPassCullMode(SDL_GpuRenderPass *pass, const SDL_GpuCullFace cullfront);
-
-void SDL_GpuSetRenderPassDepthBias(SDL_GpuRenderPass *pass, const float bias, const float scale, const float clamp);
-
-/* If never set, defaults to zero for both front and back. */
-void SDL_GpuSetRenderPassStencilReferenceValues(SDL_GpuRenderPass *pass, const Uint32 front, const Uint32 back);
-
 void SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
 void SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
 void SDL_GpuSetRenderBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);

From 3697ef7182f920c5bd77f0cb90d056eeaad0014f Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sun, 17 Apr 2022 01:42:11 -0400
Subject: [PATCH 10/54] gpu: Have app specify a pipeline when starting a render
 pass.

They're just going to have to do this seperately otherwise.
---
 include/SDL_gpu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 07b45cf1f9629..8987ad2ba2773 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -544,6 +544,7 @@ typedef struct SDL_GpuStencilAttachmentDescription
 
 /* start encoding a render pass to a command buffer. You can only encode one type of pass to a command buffer at a time. End this pass to start encoding another. */
 SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer *cmdbuf,
+                            SDL_GpuPipeline *initial_pipeline,
                             Uint32 num_color_attachments,
                             const SDL_GpuColorAttachmentDescription *color_attachments,
                             const SDL_GpuDepthAttachmentDescription *depth_attachment,

From 8c757ec088c721fcfbbb75aa99c77ab27dbd08ad Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 20 Apr 2022 23:16:54 -0400
Subject: [PATCH 11/54] gpu: Patched header to actually compile.

---
 include/SDL_gpu.h | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 8987ad2ba2773..bd06bcfa427da 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -154,11 +154,12 @@ extern "C" {
 
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
-typedef struct SDL_GpuDevice *SDL_GpuDevice;
+typedef struct SDL_GpuDevice SDL_GpuDevice;
 SDL_GpuDevice *SDL_GpuCreateDevice(const char *name);  /* `name` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
+typedef struct SDL_GpuBuffer SDL_GpuBuffer;
 SDL_GpuBuffer *SDL_GpuCreateCPUBuffer(SDL_GpuDevice *device, const Uint32 buflen);
 void *SDL_GpuLockCPUBuffer(SDL_GpuBuffer *buffer, Uint32 *_buflen);
 void SDL_GpuUnlockCPUBuffer(SDL_GpuBuffer *buffer);
@@ -192,7 +193,7 @@ typedef enum SDL_GpuPixelFormat
     SDL_GPUPIXELFMT_BGRA8_sRGB,
     SDL_GPUPIXELFMT_Depth24_Stencil8
     /* !!! FIXME: some sort of YUV format to let movies stream efficiently? */
-}   /* !!! FIXME: s3tc? pvrtc? other compressed formats? We'll need a query for what's supported, and/or guarantee it with a software fallback...? */
+    /* !!! FIXME: s3tc? pvrtc? other compressed formats? We'll need a query for what's supported, and/or guarantee it with a software fallback...? */
 } SDL_GpuPixelFormat;
 
 /* you can specify multiple values OR'd together for texture usage, for example if you are going to render to it and then later
@@ -217,10 +218,12 @@ typedef struct SDL_GpuTextureDescription
     Uint32 mipmap_levels;
 } SDL_GpuTextureDescription;
 
+typedef struct SDL_GpuTexture SDL_GpuTexture;
 SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
 void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
 
 /* compiling shaders is a different (and optional at runtime) piece, in SDL_gpu_compiler.h */
+typedef struct SDL_GpuShader SDL_GpuShader;
 SDL_GpuShader *SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);
 void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
@@ -263,6 +266,7 @@ typedef enum SDL_GpuBlendFactor
     SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1ALPHA
 } SDL_GpuBlendFactor;
 
+/* !!! FIXME: let's call this something else. */
 typedef struct SDL_GpuPipelineColorAttachmentDescription
 {
     SDL_GpuPixelFormat pixel_format;
@@ -277,7 +281,7 @@ typedef struct SDL_GpuPipelineColorAttachmentDescription
     SDL_GpuBlendOperation rgb_blend_op;
     SDL_GpuBlendFactor rgb_src_blend_factor;
     SDL_GpuBlendFactor rgb_dst_blend_factor;
-} SDL_GpuColorAttachmentDescription;
+} SDL_GpuPipelineColorAttachmentDescription;
 
 typedef enum SDL_GpuVertexFormat
 {
@@ -389,9 +393,9 @@ typedef struct SDL_GpuPipelineDescription
     SDL_GpuShader *vertex_shader;
     SDL_GpuShader *fragment_shader;
     Uint32 num_vertex_attributes;
-    SDL_GpuVertexAttributeDescription[SDL_GPU_MAX_VERTEX_ATTRIBUTES];  /* !!! FIXME: maybe don't hardcode a static array? */
+    SDL_GpuVertexAttributeDescription vertices[SDL_GPU_MAX_VERTEX_ATTRIBUTES];  /* !!! FIXME: maybe don't hardcode a static array? */
     Uint32 num_color_attachments;
-    SDL_GpuPipelineColorAttachmentDescription[SDL_GPU_MAX_COLOR_ATTACHMENTS];  /* !!! FIXME: maybe don't hardcode a static array? */
+    SDL_GpuPipelineColorAttachmentDescription color_attachments[SDL_GPU_MAX_COLOR_ATTACHMENTS];  /* !!! FIXME: maybe don't hardcode a static array? */
     SDL_GpuPixelFormat depth_format;
     SDL_GpuPixelFormat stencil_format;
     SDL_bool depth_write_enabled;
@@ -417,7 +421,8 @@ SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipel
 void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
 
 /* these make it easier to set up a Pipeline description; set the defaults (or
-   start with an existing pipeline's state) then change what you like. */
+   start with an existing pipeline's state) then change what you like.
+   Note that the `name` and shader fields are read-only; do not modify or free them! */
 void SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc);
 void SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
 
@@ -481,6 +486,14 @@ void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
  *  or create/cache a new one as needed. You can have multiple caches, so as
  *  to group related states together. You can then dump all the states at
  *  once, perhaps on level load, by deleting a specific cache.
+ *
+ * You do not own objects in these caches; do not destroy them directly. They
+ *  will be destroyed when their owning cache is destroyed.
+ *
+ * Thread safety: each type of cache (pipeline, sampler) has its own internal
+ *  mutex, which it locks during SDL_GpuGetCached* calls. It is not safe to
+ *  call SDL_GpuDestroyStateCache while that cache is being used by another
+ *  thread.
  */
 typedef struct SDL_GpuStateCache SDL_GpuStateCache;
 SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *name, SDL_GpuDevice *device);
@@ -540,11 +553,11 @@ typedef struct SDL_GpuStencilAttachmentDescription
     SDL_GpuTexture *texture;   /* MUST be created with render target support! */
     SDL_GpuPassInit stencil_init;
     Uint32 clear_stencil;
-} SDL_GpuDepthAttachmentDescription;
+} SDL_GpuStencilAttachmentDescription;
 
 /* start encoding a render pass to a command buffer. You can only encode one type of pass to a command buffer at a time. End this pass to start encoding another. */
+typedef struct SDL_GpuRenderPass SDL_GpuRenderPass;
 SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer *cmdbuf,
-                            SDL_GpuPipeline *initial_pipeline,
                             Uint32 num_color_attachments,
                             const SDL_GpuColorAttachmentDescription *color_attachments,
                             const SDL_GpuDepthAttachmentDescription *depth_attachment,
@@ -591,6 +604,7 @@ void SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SD
 void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
 
 /* start encoding a blit pass to a command buffer. You can only encode one type of pass to a command buffer at a time.  End this pass to start encoding another. */
+typedef struct SDL_GpuBlitPass SDL_GpuBlitPass;
 SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *name, SDL_GpuCommandBuffer *cmdbuf);
 void SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                  Uint32 srcx, Uint32 srcy, Uint32 srcz,

From 72c7d16d05a287204af1ca64f785b2675c9cd64d Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 20 Apr 2022 23:21:20 -0400
Subject: [PATCH 12/54] gpu: Change "name" to "label" for (maybe more?) clarity

Metal and WebGPU both use "label" ... this is just an optional name
that might show up in debugging info.
---
 include/SDL_gpu.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index bd06bcfa427da..9fbb46ebcea36 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -155,7 +155,7 @@ extern "C" {
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
 typedef struct SDL_GpuDevice SDL_GpuDevice;
-SDL_GpuDevice *SDL_GpuCreateDevice(const char *name);  /* `name` is for debugging, not a specific device name to access. */
+SDL_GpuDevice *SDL_GpuCreateDevice(const char *label);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
@@ -208,7 +208,7 @@ typedef enum SDL_GpuTextureUsage
 
 typedef struct SDL_GpuTextureDescription
 {
-    const char *name;
+    const char *label;
     SDL_GpuTextureType texture_type;
     SDL_GpuPixelFormat pixel_format;
     SDL_GpuTextureUsage usage;  /* OR SDL_GpuTextureUsage values together */
@@ -388,7 +388,7 @@ typedef enum SDL_GpuCullFace
 #define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
 typedef struct SDL_GpuPipelineDescription
 {
-    const char *name;
+    const char *label;
     SDL_GpuPrimitive primitive;
     SDL_GpuShader *vertex_shader;
     SDL_GpuShader *fragment_shader;
@@ -422,7 +422,7 @@ void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
 
 /* these make it easier to set up a Pipeline description; set the defaults (or
    start with an existing pipeline's state) then change what you like.
-   Note that the `name` and shader fields are read-only; do not modify or free them! */
+   Note that the `label` and shader fields are read-only; do not modify or free them! */
 void SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc);
 void SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
 
@@ -460,7 +460,7 @@ typedef enum SDL_GpuSamplerMipFilter
 
 typedef struct SDL_GpuSamplerDescription
 {
-    const char *name;
+    const char *label;
     SDL_GpuSamplerAddressMode addrmode_u;
     SDL_GpuSamplerAddressMode addrmode_v;
     SDL_GpuSamplerAddressMode addrmode_r;
@@ -496,7 +496,7 @@ void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
  *  thread.
  */
 typedef struct SDL_GpuStateCache SDL_GpuStateCache;
-SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *name, SDL_GpuDevice *device);
+SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device);
 SDL_GpuPipeline *SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc);
 SDL_GpuSampler *SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
 void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
@@ -519,7 +519,7 @@ void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
  *   etc) into the same command buffer.
  */
 typedef struct SDL_GpuCommandBuffer SDL_GpuCommandBuffer;
-SDL_GpuCommandBuffer *SDL_GpuCreateCommandBuffer(const char *name, SDL_GpuDevice *device);
+SDL_GpuCommandBuffer *SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device);
 
 
 /* RENDERING PASSES... */
@@ -557,7 +557,7 @@ typedef struct SDL_GpuStencilAttachmentDescription
 
 /* start encoding a render pass to a command buffer. You can only encode one type of pass to a command buffer at a time. End this pass to start encoding another. */
 typedef struct SDL_GpuRenderPass SDL_GpuRenderPass;
-SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *name, SDL_GpuCommandBuffer *cmdbuf,
+SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
                             Uint32 num_color_attachments,
                             const SDL_GpuColorAttachmentDescription *color_attachments,
                             const SDL_GpuDepthAttachmentDescription *depth_attachment,
@@ -605,7 +605,7 @@ void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
 
 /* start encoding a blit pass to a command buffer. You can only encode one type of pass to a command buffer at a time.  End this pass to start encoding another. */
 typedef struct SDL_GpuBlitPass SDL_GpuBlitPass;
-SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *name, SDL_GpuCommandBuffer *cmdbuf);
+SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf);
 void SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                  Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                  Uint32 srcw, Uint32 srch, Uint32 srcdepth,

From 84845226bde0428aa462af411004bb13cd5e3f58 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 20 Apr 2022 23:22:24 -0400
Subject: [PATCH 13/54] gpu: First shot at some extremely initial code.

This is just the state cache stuff, since it is one small piece that
operates totally independent of the lower level backends that don't
exist yet.

Long road ahead still.
---
 src/gpu/SDL_gpu.c    | 593 +++++++++++++++++++++++++++++++++++++++++++
 src/gpu/SDL_sysgpu.h |  61 +++++
 2 files changed, 654 insertions(+)
 create mode 100644 src/gpu/SDL_gpu.c
 create mode 100644 src/gpu/SDL_sysgpu.h

diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
new file mode 100644
index 0000000000000..c2252d6b62419
--- /dev/null
+++ b/src/gpu/SDL_gpu.c
@@ -0,0 +1,593 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+#include "../SDL_internal.h"
+
+/* The high-level gpu subsystem */
+
+#include "SDL.h"
+#include "SDL_sysgpu.h"
+
+SDL_GpuDevice *
+SDL_GpuCreateDevice(const char *label)
+{
+}
+
+void
+SDL_GpuDestroyDevice(SDL_GpuDevice *device)
+{
+}
+
+SDL_GpuBuffer *
+SDL_GpuCreateCPUBuffer(SDL_GpuDevice *device, const Uint32 buflen)
+{
+}
+
+void *
+SDL_GpuLockCPUBuffer(SDL_GpuBuffer *buffer, Uint32 *_buflen)
+{
+}
+
+void
+SDL_GpuUnlockCPUBuffer(SDL_GpuBuffer *buffer)
+{
+}
+
+SDL_GpuBuffer *
+SDL_GpuCreateBuffer(SDL_GpuDevice *device, const Uint32 length)
+{
+}
+
+void
+SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer)
+{
+}
+
+
+SDL_GpuTexture *
+SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc)
+{
+}
+
+void
+SDL_GpuDestroyTexture(SDL_GpuTexture *texture)
+{
+}
+
+SDL_GpuShader *
+SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen)
+{
+}
+
+void
+SDL_GpuDestroyShader(SDL_GpuShader *shader)
+{
+}
+
+SDL_GpuTexture *
+SDL_GpuGetBackbuffer(SDL_GpuDevice *device)
+{
+}
+
+SDL_GpuPipeline *
+SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc)
+{
+}
+
+void
+SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline)
+{
+}
+
+void
+SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc)
+{
+}
+
+void
+SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc)
+{
+}
+
+SDL_GpuSampler *
+SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc)
+{
+}
+
+void
+SDL_GpuDestroySampler(SDL_GpuSampler *sampler)
+{
+}
+
+
+/* GpuStateCache hashtable implementations... */
+
+/* !!! FIXME: crc32 algorithm is probably overkill here. */
+#define CRC32_INIT_VALUE 0xFFFFFFFF
+#define CRC32_APPEND_VAR(crc, var) crc = crc32_append(crc, &var, sizeof (var))
+#define CRC32_FINISH(crc) crc ^= 0xFFFFFFFF
+
+static Uint32
+crc32_append(Uint32 crc, const void *_buf, const size_t buflen)
+{
+    const Uint8 *buf = (const Uint8 *) _buf;
+    size_t i;
+    for (i = 0; i < buflen; i++) {
+        Uint32 xorval = (Uint32) ((crc ^ *(buf++)) & 0xFF);
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        xorval = ((xorval & 1) ? (0xEDB88320 ^ (xorval >> 1)) : (xorval >> 1));
+        crc = xorval ^ (crc >> 8);
+    } // for
+
+    return crc;
+}
+
+static Uint32 hash_pipeline(const void *key, void *data)
+{
+    /* this hashes most pointers; this hash is meant to be unique and contained in this process. As such, it also doesn't care about enum size or byte order. */
+    /* However, it _does_ care about uninitialized packing bytes, so it doesn't just hash the sizeof (object). */
+    const SDL_GpuPipelineDescription *desc = (const SDL_GpuPipelineDescription *) key;
+    Uint32 crc = CRC32_INIT_VALUE;
+    Uint32 i;
+
+    if (desc->label) { crc = crc32_append(crc, desc->label, SDL_strlen(desc->label)); }  /* NULL means less bytes hashed to keep it unique vs "". */
+
+    CRC32_APPEND_VAR(crc, desc->primitive);
+    CRC32_APPEND_VAR(crc, desc->vertex_shader);
+    CRC32_APPEND_VAR(crc, desc->fragment_shader);
+
+    CRC32_APPEND_VAR(crc, desc->num_vertex_attributes);
+    for (i = 0; i < desc->num_vertex_attributes; i++) {
+        CRC32_APPEND_VAR(crc, desc->vertices[i].format);
+        CRC32_APPEND_VAR(crc, desc->vertices[i].offset);
+        CRC32_APPEND_VAR(crc, desc->vertices[i].stride);
+        CRC32_APPEND_VAR(crc, desc->vertices[i].index);
+    }
+
+    CRC32_APPEND_VAR(crc, desc->num_color_attachments);
+    for (i = 0; i < desc->num_color_attachments; i++) {
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].pixel_format);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].writemask_enabled_red);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].writemask_enabled_blue);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].writemask_enabled_green);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].writemask_enabled_alpha);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].blending_enabled);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].alpha_blend_op);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].alpha_src_blend_factor);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].alpha_dst_blend_factor);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].rgb_blend_op);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].rgb_src_blend_factor);
+        CRC32_APPEND_VAR(crc, desc->color_attachments[i].rgb_dst_blend_factor);
+    }
+
+    CRC32_APPEND_VAR(crc, desc->depth_format);
+    CRC32_APPEND_VAR(crc, desc->stencil_format);
+    CRC32_APPEND_VAR(crc, desc->depth_write_enabled);
+    CRC32_APPEND_VAR(crc, desc->stencil_read_mask);
+    CRC32_APPEND_VAR(crc, desc->stencil_write_mask);
+    CRC32_APPEND_VAR(crc, desc->stencil_reference_front);
+    CRC32_APPEND_VAR(crc, desc->stencil_reference_back);
+    CRC32_APPEND_VAR(crc, desc->depth_function);
+    CRC32_APPEND_VAR(crc, desc->stencil_function);
+    CRC32_APPEND_VAR(crc, desc->stencil_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_and_stencil_pass);
+    CRC32_APPEND_VAR(crc, desc->fill_mode);
+    CRC32_APPEND_VAR(crc, desc->front_face);
+    CRC32_APPEND_VAR(crc, desc->cull_face);
+    CRC32_APPEND_VAR(crc, desc->depth_bias);
+    CRC32_APPEND_VAR(crc, desc->depth_bias_scale);
+    CRC32_APPEND_VAR(crc, desc->depth_bias_clamp);
+
+    return CRC32_FINISH(crc);
+}
+
+static SDL_bool keymatch_pipeline(const void *_a, const void *_b, void *data)
+{
+    const SDL_GpuPipelineDescription *a = (const SDL_GpuPipelineDescription *) _a;
+    const SDL_GpuPipelineDescription *b = (const SDL_GpuPipelineDescription *) _b;
+    Uint32 i;
+
+    if ( (!SDL_KeyMatchString(a->label, b->label, NULL)) ||
+         (a->primitive != b->primitive) ||
+         (a->vertex_shader != b->vertex_shader) ||
+         (a->fragment_shader != b->fragment_shader) ||
+         (a->num_vertex_attributes != b->num_vertex_attributes) ||
+         (a->num_color_attachments != b->num_color_attachments) ||
+         (a->depth_format != b->depth_format) ||
+         (a->stencil_format != b->stencil_format) ||
+         (a->depth_write_enabled != b->depth_write_enabled) ||
+         (a->stencil_read_mask != b->stencil_read_mask) ||
+         (a->stencil_write_mask != b->stencil_write_mask) ||
+         (a->stencil_reference_front != b->stencil_reference_front) ||
+         (a->stencil_reference_back != b->stencil_reference_back) ||
+         (a->depth_function != b->depth_function) ||
+         (a->stencil_function != b->stencil_function) ||
+         (a->stencil_fail != b->stencil_fail) ||
+         (a->depth_fail != b->depth_fail) ||
+         (a->depth_and_stencil_pass != b->depth_and_stencil_pass) ||
+         (a->fill_mode != b->fill_mode) ||
+         (a->front_face != b->front_face) ||
+         (a->cull_face != b->cull_face) ||
+         (a->depth_bias != b->depth_bias) ||
+         (a->depth_bias_scale != b->depth_bias_scale) ||
+         (a->depth_bias_clamp != b->depth_bias_clamp) ) {
+        return SDL_FALSE;
+    }
+
+    /* still here? Compare the arrays */
+    for (i = 0; i < a->num_vertex_attributes; i++) {
+        const SDL_GpuVertexAttributeDescription *av = &a->vertices[i];
+        const SDL_GpuVertexAttributeDescription *bv = &b->vertices[i];
+        if ( (av->format != bv->format) ||
+             (av->offset != bv->offset) ||
+             (av->stride != bv->stride) ||
+             (av->index != bv->index) ) {
+            return SDL_FALSE;
+        }
+    }
+
+    for (i = 0; i < a->num_color_attachments; i++) {
+        const SDL_GpuPipelineColorAttachmentDescription *ac = &a->color_attachments[i];
+        const SDL_GpuPipelineColorAttachmentDescription *bc = &b->color_attachments[i];
+        if ( (ac->pixel_format != bc->pixel_format) ||
+             (ac->writemask_enabled_red != bc->writemask_enabled_red) ||
+             (ac->writemask_enabled_blue != bc->writemask_enabled_blue) ||
+             (ac->writemask_enabled_green != bc->writemask_enabled_green) ||
+             (ac->writemask_enabled_alpha != bc->writemask_enabled_alpha) ||
+             (ac->blending_enabled != bc->blending_enabled) ||
+             (ac->alpha_blend_op != bc->alpha_blend_op) ||
+             (ac->alpha_src_blend_factor != bc->alpha_src_blend_factor) ||
+             (ac->alpha_dst_blend_factor != bc->alpha_dst_blend_factor) ||
+             (ac->rgb_blend_op != bc->rgb_blend_op) ||
+             (ac->rgb_src_blend_factor != bc->rgb_src_blend_factor) ||
+             (ac->rgb_dst_blend_factor != bc->rgb_dst_blend_factor) ) {
+            return SDL_FALSE;
+        }
+    }
+
+    return SDL_TRUE;
+}
+
+void nuke_pipeline(const void *key, const void *value, void *data)
+{
+    SDL_GpuPipelineDescription *desc = (SDL_GpuPipelineDescription *) key;
+    SDL_free((void *) desc->label);
+    SDL_free(desc);
+    SDL_GpuDestroyPipeline((SDL_GpuPipeline *) value);
+}
+
+
+static Uint32 hash_sampler(const void *key, void *data)
+{
+    /* this hashes most pointers; this hash is meant to be unique and contained in this process. As such, it also doesn't care about enum size or byte order. */
+    /* However, it _does_ care about uninitialized packing bytes, so it doesn't just hash the sizeof (object). */
+    const SDL_GpuSamplerDescription *desc = (const SDL_GpuSamplerDescription *) key;
+    Uint32 crc = CRC32_INIT_VALUE;
+
+    if (desc->label) { crc = crc32_append(crc, desc->label, SDL_strlen(desc->label)); }  /* NULL means less bytes hashed to keep it unique vs "". */
+    CRC32_APPEND_VAR(crc, desc->addrmode_u);
+    CRC32_APPEND_VAR(crc, desc->addrmode_v);
+    CRC32_APPEND_VAR(crc, desc->addrmode_r);
+    CRC32_APPEND_VAR(crc, desc->border_color);
+    CRC32_APPEND_VAR(crc, desc->min_filter);
+    CRC32_APPEND_VAR(crc, desc->mag_filter);
+    CRC32_APPEND_VAR(crc, desc->mip_filter);
+    return CRC32_FINISH(crc);
+}
+
+static SDL_bool keymatch_sampler(const void *_a, const void *_b, void *data)
+{
+    const SDL_GpuSamplerDescription *a = (const SDL_GpuSamplerDescription *) _a;
+    const SDL_GpuSamplerDescription *b = (const SDL_GpuSamplerDescription *) _b;
+    return ( (SDL_KeyMatchString(a->label, b->label, NULL)) &&
+             (a->addrmode_u == b->addrmode_u) &&
+             (a->addrmode_v == b->addrmode_v) &&
+             (a->addrmode_r == b->addrmode_r) &&
+             (a->min_filter == b->min_filter) &&
+             (a->mag_filter == b->mag_filter) &&
+             (a->mip_filter == b->mip_filter) ) ? SDL_TRUE : SDL_FALSE;
+}
+
+void nuke_sampler(const void *key, const void *value, void *data)
+{
+    SDL_GpuSamplerDescription *desc = (SDL_GpuSamplerDescription *) key;
+    SDL_free((void *) desc->label);
+    SDL_free(desc);
+    SDL_GpuDestroySampler((SDL_GpuSampler *) value);
+}
+
+SDL_GpuStateCache *
+SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
+{
+    SDL_GpuStateCache *cache = (SDL_GpuStateCache *) SDL_calloc(1, sizeof (SDL_GpuStateCache));
+    if (!cache) {
+        SDL_OutOfMemory();
+        return NULL;
+    }
+
+    cache->pipeline_mutex = SDL_CreateMutex();
+    if (!cache->pipeline_mutex) {
+        goto failed;
+    }
+
+    cache->sampler_mutex = SDL_CreateMutex();
+    if (!cache->sampler_mutex) {
+        goto failed;
+    }
+
+    if (label) {
+        cache->label = SDL_strdup(label);
+        if (!cache->label) {
+            SDL_OutOfMemory();
+            goto failed;
+        }
+    }
+
+    /* !!! FIXME: adjust hash table bucket counts? */
+
+    cache->pipeline_cache = SDL_NewHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
+    if (!cache->pipeline_cache) {
+        goto failed;
+    }
+
+    cache->sampler_cache = SDL_NewHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
+    if (!cache->sampler_cache) {
+        goto failed;
+    }
+
+    cache->device = device;
+    return cache;
+
+failed:
+    SDL_GpuDestroyStateCache(cache);  /* can clean up half-created objects. */
+    return NULL;
+}
+
+#define GETCACHEDOBJIMPL(ctyp, typ) \
+    SDL_Gpu##ctyp *retval; \
+    const void *val; \
+    \
+    if (!cache) { \
+        SDL_InvalidParamError("cache"); \
+        return NULL; \
+    } \
+    \
+    SDL_LockMutex(cache->typ##_mutex); \
+    if (SDL_FindInHashTable(cache->typ##_cache, desc, &val)) { \
+        retval = (SDL_Gpu##ctyp *) val; \
+    } else {  /* not cached yet, make a new one and cache it. */ \
+        retval = SDL_GpuCreate##ctyp(cache->device, desc); \
+        if (retval) { \
+            if (!SDL_InsertIntoHashTable(cache->typ##_cache, &retval->desc, retval)) { \
+                SDL_GpuDestroy##ctyp(retval); \
+                retval = NULL; \
+            } \
+        } \
+    } \
+    SDL_UnlockMutex(cache->typ##_mutex); \
+    return retval
+
+SDL_GpuPipeline *
+SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc)
+{
+    GETCACHEDOBJIMPL(Pipeline, pipeline);
+}
+
+SDL_GpuSampler *
+SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc)
+{
+    GETCACHEDOBJIMPL(Sampler, sampler);
+}
+
+void
+SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache)
+{
+    if (cache) {
+        SDL_DestroyMutex(cache->pipeline_mutex);
+        SDL_FreeHashTable(cache->pipeline_cache);
+        SDL_DestroyMutex(cache->sampler_mutex);
+        SDL_FreeHashTable(cache->sampler_cache);
+        SDL_free((void *) cache->label);
+        SDL_free(cache);
+    }
+}
+
+SDL_GpuCommandBuffer *
+SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device)
+{
+}
+
+
+SDL_GpuRenderPass *
+SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
+                       Uint32 num_color_attachments,
+                       const SDL_GpuColorAttachmentDescription *color_attachments,
+                       const SDL_GpuDepthAttachmentDescription *depth_attachment,
+                       const SDL_GpuStencilAttachmentDescription *stencil_attachment)
+{
+}
+
+
+void
+SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
+{
+}
+
+void
+SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar)
+{
+}
+
+void
+SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height)
+{
+}
+
+void
+SDL_GpuSetRenderBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha)
+{
+}
+
+void
+SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
+{
+}
+
+void
+SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
+{
+}
+
+void
+SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
+{
+}
+
+void
+SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
+{
+}
+
+void
+SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
+{
+}
+
+void
+SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
+{
+}
+
+void
+SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count)
+{
+}
+
+void
+SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset)
+{
+}
+
+void
+SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance)
+{
+}
+
+void
+SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance)
+{
+}
+
+
+void
+SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
+{
+}
+
+
+SDL_GpuBlitPass *
+SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
+{
+}
+
+void
+SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+                                 Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                                 Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                 SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                                 Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+}
+
+void
+SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, unsigned char value)
+{
+}
+
+void
+SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
+{
+}
+
+void
+SDL_GpuCopyBetweenBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+}
+
+void
+SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
+                                     Uint32 srcpitch, Uint32 srcimgpitch,
+                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                     SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                                     Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+}
+
+void
+SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+                                     Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                                     SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch)
+{
+}
+
+void
+SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
+{
+}
+
+SDL_GpuFence *
+SDL_GpuCreateFence(SDL_GpuDevice *device)
+{
+}
+
+void
+SDL_GpuDestroyFence(SDL_GpuFence *fence)
+{
+}
+
+int
+SDL_GpuQueryFence(SDL_GpuFence *fence)
+{
+}
+
+int
+SDL_GpuResetFence(SDL_GpuFence *fence)
+{
+}
+
+int
+SDL_GpuWaitFence(SDL_GpuFence *fence)
+{
+}
+
+void
+SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, const SDL_bool also_present, SDL_GpuFence *fence)
+{
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
new file mode 100644
index 0000000000000..a226a24a12e33
--- /dev/null
+++ b/src/gpu/SDL_sysgpu.h
@@ -0,0 +1,61 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+#include "../SDL_internal.h"
+
+#ifndef SDL_sysgpu_h_
+#define SDL_sysgpu_h_
+
+#define SDL_SUPPRESS_GPU_API_UNSTABLE_WARNING 1  /* !!! FIXME: remove later */
+#include "SDL_gpu.h"
+#include "../SDL_hashtable.h"
+
+struct SDL_GpuDevice
+{
+//    SDL_GpuBuffer *(*CreateCPUBuffer)(SDL_GpuDevice *_this, const Uint32 buflen);
+
+};
+
+struct SDL_GpuPipeline
+{
+    SDL_GpuPipelineDescription desc;
+};
+
+struct SDL_GpuSampler
+{
+    SDL_GpuSamplerDescription desc;
+};
+
+/* Multiple mutexes might be overkill, but there's no reason to
+   block all caches when one is being accessed. */
+struct SDL_GpuStateCache
+{
+    const char *label;
+    SDL_GpuDevice *device;
+    SDL_mutex *pipeline_mutex;
+    SDL_HashTable *pipeline_cache;
+    SDL_mutex *sampler_mutex;
+    SDL_HashTable *sampler_cache;
+};
+
+
+#endif /* SDL_sysgpu_h_ */
+
+/* vi: set ts=4 sw=4 expandtab: */

From 082d22257fdfdf8cf879823786b0deeee6a42ca3 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 20 Apr 2022 23:24:16 -0400
Subject: [PATCH 14/54] gpu: Wire up the basic bits to the CMake project.

---
 CMakeLists.txt                                | 1 +
 include/build_config/SDL_build_config.h.cmake | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0baea19031b73..47ae5b7f06c29 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -471,6 +471,7 @@ sdl_glob_sources(
   "${SDL3_SOURCE_DIR}/src/events/*.c"
   "${SDL3_SOURCE_DIR}/src/file/*.c"
   "${SDL3_SOURCE_DIR}/src/joystick/*.c"
+  "${SDL3_SOURCE_DIR}/src/gpu/*.c"
   "${SDL3_SOURCE_DIR}/src/haptic/*.c"
   "${SDL3_SOURCE_DIR}/src/hidapi/*.c"
   "${SDL3_SOURCE_DIR}/src/libm/*.c"
diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake
index 017e06b06410d..0e5ccf6e18a55 100644
--- a/include/build_config/SDL_build_config.h.cmake
+++ b/include/build_config/SDL_build_config.h.cmake
@@ -257,6 +257,7 @@
 #cmakedefine SDL_CPUINFO_DISABLED @SDL_CPUINFO_DISABLED@
 #cmakedefine SDL_EVENTS_DISABLED @SDL_EVENTS_DISABLED@
 #cmakedefine SDL_FILE_DISABLED @SDL_FILE_DISABLED@
+#cmakedefine SDL_GPU_DISABLED @SDL_GPU_DISABLED@
 #cmakedefine SDL_JOYSTICK_DISABLED @SDL_JOYSTICK_DISABLED@
 #cmakedefine SDL_HAPTIC_DISABLED @SDL_HAPTIC_DISABLED@
 #cmakedefine SDL_HIDAPI_DISABLED @SDL_HIDAPI_DISABLED@

From 7595693257f616e3f6f6f719491a9001d1a6b21b Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sat, 23 Apr 2022 10:29:01 -0400
Subject: [PATCH 15/54] gpu: Associate a GpuDevice with an SDL_Window at
 creation time.

---
 include/SDL_gpu.h | 2 +-
 src/gpu/SDL_gpu.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 9fbb46ebcea36..df36a4a3c596b 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -155,7 +155,7 @@ extern "C" {
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
 typedef struct SDL_GpuDevice SDL_GpuDevice;
-SDL_GpuDevice *SDL_GpuCreateDevice(const char *label);  /* `label` is for debugging, not a specific device name to access. */
+SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, SDL_Window *window);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index c2252d6b62419..c7639fce56ca6 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -25,8 +25,9 @@
 #include "SDL.h"
 #include "SDL_sysgpu.h"
 
+/* !!! FIXME: change this API to allow selection of a specific GPU? */
 SDL_GpuDevice *
-SDL_GpuCreateDevice(const char *label)
+SDL_GpuCreateDevice(const char *label, SDL_Window *window)
 {
 }
 

From 76964411aea4da215aa9eb51fbc1f032d43f389a Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sat, 23 Apr 2022 10:31:13 -0400
Subject: [PATCH 16/54] gpu: Add SDL_GpuGetTextureDescription.

Will probably do this for other opaque objects, too.
---
 include/SDL_gpu.h | 1 +
 src/gpu/SDL_gpu.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index df36a4a3c596b..4d16a3a947834 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -221,6 +221,7 @@ typedef struct SDL_GpuTextureDescription
 typedef struct SDL_GpuTexture SDL_GpuTexture;
 SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
 void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
+void SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
 
 /* compiling shaders is a different (and optional at runtime) piece, in SDL_gpu_compiler.h */
 typedef struct SDL_GpuShader SDL_GpuShader;
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index c7639fce56ca6..b7d5bafa4a562 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -67,6 +67,11 @@ SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *des
 {
 }
 
+void
+SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc)
+{
+}
+
 void
 SDL_GpuDestroyTexture(SDL_GpuTexture *texture)
 {

From ac1b9ba19dd97d0b73af82c7b09b7db3c1c85ceb Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sat, 23 Apr 2022 10:32:09 -0400
Subject: [PATCH 17/54] gpu: Added test/testgpu_simple_clear.c

This is the equivalent of test/testvulkan.c (just clear the framebuffer),
but it shows the complexity level of SDL_Gpu* vs Vulkan.

This is untested, as none of the API is implemented yet, so this might
change. Work in progress!
---
 test/CMakeLists.txt         |   1 +
 test/testgpu_simple_clear.c | 178 ++++++++++++++++++++++++++++++++++++
 2 files changed, 179 insertions(+)
 create mode 100644 test/testgpu_simple_clear.c

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4ce431e73feeb..80c6a6280fb63 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -360,6 +360,7 @@ add_sdl_test_executable(testcustomcursor SOURCES testcustomcursor.c)
 add_sdl_test_executable(testvulkan NO_C90 SOURCES testvulkan.c)
 add_sdl_test_executable(testoffscreen SOURCES testoffscreen.c)
 add_sdl_test_executable(testpopup SOURCES testpopup.c)
+add_sdl_test_executable(testgpu_simple_clear TESTUTILS SOURCES testgpu_simple_clear.c)
 
 check_c_compiler_flag(-Wformat-overflow HAVE_WFORMAT_OVERFLOW)
 if(HAVE_WFORMAT_OVERFLOW)
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
new file mode 100644
index 0000000000000..c5a0beaa48396
--- /dev/null
+++ b/test/testgpu_simple_clear.c
@@ -0,0 +1,178 @@
+/*
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely.
+*/
+
+/* This is the equivalent of testvulkan.c (just clears the screen, fading colors). */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "SDL_test_common.h"
+#include "SDL_gpu.h"
+
+typedef struct GpuContext
+{
+    SDL_Window *window;
+    SDL_GpuDevice *device;
+} GpuContext;
+
+static SDLTest_CommonState *state;
+static GpuContext *gpuContexts = NULL;  // an array of state->num_windows items
+static GpuContext *gpuContext = NULL;  // for the currently-rendering window
+
+static void shutdownGpu(void);
+
+/* Call this instead of exit(), so we can clean up SDL: atexit() is evil. */
+static void quit(int rc)
+{
+    shutdownGpu();
+    SDLTest_CommonQuit(state);
+    exit(rc);
+}
+
+static void initGpu(void)
+{
+    int i;
+
+    gpuContexts = (GpuContext *) SDL_calloc(state->num_windows, sizeof (GpuContext));
+    if (!gpuContexts) {
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Out of memory!");
+        quit(2);
+    }
+
+    for (i = 0; i < state->num_windows; ++i) {
+        char label[64];
+        gpuContext = &gpuContexts[i];
+        gpuContext->window = state->windows[i];
+        SDL_snprintf(label, sizeof (label), "Window #%d", i);
+        gpuContext->device = SDL_GpuCreateDevice(label, state->windows[i]);
+    }
+}
+
+static void shutdownGpu(void)
+{
+    if (gpuContexts) {
+        int i;
+        for (i = 0; i < state->num_windows; ++i) {
+            gpuContext = &gpuContexts[i];
+            SDL_GpuDestroyDevice(gpuContext->device);
+        }
+        SDL_free(gpuContexts);
+        gpuContexts = NULL;
+    }
+}
+
+static SDL_bool render(void)
+{
+    double currentTime;
+    SDL_GpuColorAttachmentDescription color_desc;
+    SDL_GpuCommandBuffer *cmd;
+    SDL_GpuRenderPass *pass;
+
+    cmd = SDL_GpuCreateCommandBuffer("empty command buffer", gpuContext->device);
+    if (!cmd) {
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuCreateCommandBuffer(): %s\n", SDL_GetError());
+        quit(2);
+    }
+
+    currentTime = (double)SDL_GetPerformanceCounter() / SDL_GetPerformanceFrequency();
+
+    SDL_zero(color_desc);
+    color_desc.texture = SDL_GpuGetBackbuffer(gpuContext->device);
+    color_desc.color_init = SDL_GPUPASSINIT_CLEAR;
+    color_desc.clear_red = (float)(0.5 + 0.5 * SDL_sin(currentTime));
+    color_desc.clear_green = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 2 / 3));
+    color_desc.clear_blue = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 4 / 3));
+    color_desc.clear_alpha = 1.0f;
+
+    pass = SDL_GpuStartRenderPass("just-clear-the-screen render pass", cmd, 1, &color_desc, NULL, NULL);
+    if (!cmd) {
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuStartRenderPass(): %s\n", SDL_GetError());
+        quit(2);
+    }
+
+    /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
+    SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_TRUE, NULL);
+
+    return SDL_TRUE;
+}
+
+int main(int argc, char **argv)
+{
+    int done;
+    SDL_DisplayMode mode;
+    SDL_Event event;
+    Uint32 then, now, frames;
+    SDL_GpuTextureDescription texdesc;
+    int dw, dh;
+
+    /* Enable standard application logging */
+    SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
+
+    /* Initialize test framework */
+    state = SDLTest_CommonCreateState(argv, SDL_INIT_VIDEO);
+    if(!state) {
+        return 1;
+    }
+
+    state->skip_renderer = 1;
+
+    if (!SDLTest_CommonDefaultArgs(state, argc, argv) || !SDLTest_CommonInit(state)) {
+        SDLTest_CommonQuit(state);
+        return 1;
+    }
+
+    initGpu();
+
+    SDL_GetCurrentDisplayMode(0, &mode);
+    SDL_Log("Screen BPP    : %d\n", SDL_BITSPERPIXEL(mode.format));
+    SDL_GetWindowSize(state->windows[0], &dw, &dh);
+    SDL_Log("Window Size   : %d,%d\n", dw, dh);
+    SDL_GpuGetTextureDescription(SDL_GpuGetBackbuffer(gpuContext->device), &texdesc);
+    SDL_Log("Draw Size     : %d,%d\n", (int) texdesc.width, (int) texdesc.height);
+    SDL_Log("\n");
+
+    /* Main render loop */
+    frames = 0;
+    then = SDL_GetTicks();
+    done = 0;
+    while (!done) {
+        /* Check for events */
+        frames++;
+        while(SDL_PollEvent(&event)) {
+            SDLTest_CommonEvent(state, &event, &done);
+        }
+
+        if (!done) {
+            int i;
+            for (i = 0; i < state->num_windows; ++i) {
+                if (state->windows[i]) {
+                    gpuContext = &gpuContexts[i];
+                    render();
+                }
+            }
+        }
+    }
+
+    /* Print out some timing information */
+    now = SDL_GetTicks();
+    if (now > then) {
+        SDL_Log("%2.2f frames per second\n", ((double)frames * 1000) / (now - then));
+    }
+
+    shutdownGpu();
+    SDLTest_CommonQuit(state);
+    return 0;
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
+

From 6126e1762a817196c13d897e45badd7b629f72a0 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Sat, 23 Apr 2022 12:50:17 -0400
Subject: [PATCH 18/54] testgpu_simple_clear: Fixed copy/paste bug.

---
 test/testgpu_simple_clear.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index c5a0beaa48396..e4f1593d2f98a 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -95,7 +95,7 @@ static SDL_bool render(void)
     color_desc.clear_alpha = 1.0f;
 
     pass = SDL_GpuStartRenderPass("just-clear-the-screen render pass", cmd, 1, &color_desc, NULL, NULL);
-    if (!cmd) {
+    if (!pass) {
         SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuStartRenderPass(): %s\n", SDL_GetError());
         quit(2);
     }

From 1a08eef24cb263da1ad1a2d0c24df9da9b92d51f Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 28 Apr 2022 09:57:28 -0400
Subject: [PATCH 19/54] gpu: Added some FIXMEs.

---
 include/SDL_gpu.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 4d16a3a947834..9d077b1552bec 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -154,6 +154,9 @@ extern "C" {
 
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
+/* !!! FIXME: Allow windows to share an SDL_GpuDevice */
+/* !!! FIXME: uh, we need a vsync API. */
+
 typedef struct SDL_GpuDevice SDL_GpuDevice;
 SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, SDL_Window *window);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);

From b8fcd5ce9bb5cbfa6b1f32d3afd6238adbac0af4 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 29 Apr 2022 10:09:49 -0400
Subject: [PATCH 20/54] gpu: Disassociate GPU devices from a specific window.

---
 include/SDL_gpu.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 9d077b1552bec..0ff0811ee38f2 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -154,11 +154,8 @@ extern "C" {
 
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
-/* !!! FIXME: Allow windows to share an SDL_GpuDevice */
-/* !!! FIXME: uh, we need a vsync API. */
-
 typedef struct SDL_GpuDevice SDL_GpuDevice;
-SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, SDL_Window *window);  /* `label` is for debugging, not a specific device name to access. */
+SDL_GpuDevice *SDL_GpuCreateDevice(const char *label);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
@@ -655,8 +652,13 @@ int SDL_GpuWaitFence(SDL_GpuFence *fence);
  * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
+ *
+ * If this command buffer is to present to a window, specify a non-NULL present_window. The swapinterval should be 0 (present immediately), 1 (present during vsync),
+ *  or -1 (present during vsync unless we've missed vsync, in which case present immediately). swapinterval is ignored if present_window is NULL. The window
+ *  may be destroyed and recreated on first use if incompatible with the SDL_GpuDevice; as such, it does not need to be created with SDL_WINDOW_OPENGL or _VULKAN,
+ *  etc, as this API will take care of it.
  */
-void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, const SDL_bool also_present, SDL_GpuFence *fence);
+void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_Window *present_window, const int swapinterval, SDL_GpuFence *fence);
 
 /* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
 

From ac4fccc7e7e6d680a1aeaeeed16ff607f65b1c32 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 29 Apr 2022 16:09:38 -0400
Subject: [PATCH 21/54] gpu: Connect to SDL_Window in SDL_GpuGetBackbuffer.

This makes more sense, since this is the actual part we care about with
windows, and we need to know it when making the render pass, not
when submitting the command buffer.

Updated the testgpu_simple_clear.c code to match the recent API changes.
---
 include/SDL_gpu.h           | 32 +++++++++++++----
 src/gpu/SDL_gpu.c           |  6 ++--
 test/testgpu_simple_clear.c | 70 +++++++++++--------------------------
 3 files changed, 48 insertions(+), 60 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 0ff0811ee38f2..a4b41b2441bf7 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -229,8 +229,19 @@ SDL_GpuShader *SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, c
 void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
 
-/* !!! FIXME: I don't know what this is going to look like yet, this is a placeholder. */
-SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device);
+/*
+ * Get a texture that can be used for rendering to an SDL window. The window
+ * may be destroyed and recreated internally on first use if incompatible with the SDL_GpuDevice!
+ * As such, it does not need to be created with SDL_WINDOW_OPENGL or _VULKAN,
+ * etc, as this API will take care of it.
+ * Do not save this texture beyond using it for a render pass color attachment! It's likely that
+ * a window has multiple textures that it cycles through (and even those might get replaced if
+ * the window is resized or hidden or if the OS just feels like it moment by moment).
+ *
+ * This call may block if you've got every backbuffer from the window in flight, rendering other
+ * frames that haven't completed yet. Use fences if you need to avoid this.
+ */
+SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
 
 
 /* PRECOOKED STATE OBJECTS... */
@@ -648,17 +659,24 @@ int SDL_GpuQueryFence(SDL_GpuFence *fence);
 int SDL_GpuResetFence(SDL_GpuFence *fence);
 int SDL_GpuWaitFence(SDL_GpuFence *fence);
 
+
+typedef enum SDL_GpuPresentType
+{
+    SDL_GPUPRESENT_NONE,   /* don't present (not rendering to a window or more command buffers to queue first) */
+    SDL_GPUPRESENT_IMMEDIATE, /* present immediately, don't wait for vsync */
+    SDL_GPUPRESENT_VSYNC,  /* present synced to vertical retrace */
+    SDL_GPUPRESENT_ADAPTIVE_VSYNC  /* vsync if we're running fast enough, immediate if we've missed vsync. If unsupported, this waits for vsync. */
+} SDL_GpuPresentType;
+
 /*
  * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
  *
- * If this command buffer is to present to a window, specify a non-NULL present_window. The swapinterval should be 0 (present immediately), 1 (present during vsync),
- *  or -1 (present during vsync unless we've missed vsync, in which case present immediately). swapinterval is ignored if present_window is NULL. The window
- *  may be destroyed and recreated on first use if incompatible with the SDL_GpuDevice; as such, it does not need to be created with SDL_WINDOW_OPENGL or _VULKAN,
- *  etc, as this API will take care of it.
+ * If this command buffer is to present to a window, specify a non-NULL present_window.
+ *  presenttype is ignored if this isn't a render pass using a window's backbuffer.
  */
-void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_Window *present_window, const int swapinterval, SDL_GpuFence *fence);
+void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
 
 /* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
 
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index b7d5bafa4a562..f7be4f4940f05 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -27,7 +27,7 @@
 
 /* !!! FIXME: change this API to allow selection of a specific GPU? */
 SDL_GpuDevice *
-SDL_GpuCreateDevice(const char *label, SDL_Window *window)
+SDL_GpuCreateDevice(const char *label)
 {
 }
 
@@ -88,7 +88,7 @@ SDL_GpuDestroyShader(SDL_GpuShader *shader)
 }
 
 SDL_GpuTexture *
-SDL_GpuGetBackbuffer(SDL_GpuDevice *device)
+SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
 {
 }
 
@@ -592,7 +592,7 @@ SDL_GpuWaitFence(SDL_GpuFence *fence)
 }
 
 void
-SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, const SDL_bool also_present, SDL_GpuFence *fence)
+SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence)
 {
 }
 
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index e4f1593d2f98a..2d0075468a587 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -19,17 +19,14 @@
 #include "SDL_test_common.h"
 #include "SDL_gpu.h"
 
-typedef struct GpuContext
-{
-    SDL_Window *window;
-    SDL_GpuDevice *device;
-} GpuContext;
-
 static SDLTest_CommonState *state;
-static GpuContext *gpuContexts = NULL;  // an array of state->num_windows items
-static GpuContext *gpuContext = NULL;  // for the currently-rendering window
+SDL_GpuDevice *gpuDevice = NULL;
 
-static void shutdownGpu(void);
+static void shutdownGpu(void)
+{
+    SDL_GpuDestroyDevice(gpuDevice);
+    gpuDevice = NULL;
+}
 
 /* Call this instead of exit(), so we can clean up SDL: atexit() is evil. */
 static void quit(int rc)
@@ -41,44 +38,25 @@ static void quit(int rc)
 
 static void initGpu(void)
 {
-    int i;
-
-    gpuContexts = (GpuContext *) SDL_calloc(state->num_windows, sizeof (GpuContext));
-    if (!gpuContexts) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Out of memory!");
+    gpuDevice = SDL_GpuCreateDevice("The GPU device");
+    if (!gpuDevice) {
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to create GPU device: %s", SDL_GetError());
         quit(2);
     }
-
-    for (i = 0; i < state->num_windows; ++i) {
-        char label[64];
-        gpuContext = &gpuContexts[i];
-        gpuContext->window = state->windows[i];
-        SDL_snprintf(label, sizeof (label), "Window #%d", i);
-        gpuContext->device = SDL_GpuCreateDevice(label, state->windows[i]);
-    }
-}
-
-static void shutdownGpu(void)
-{
-    if (gpuContexts) {
-        int i;
-        for (i = 0; i < state->num_windows; ++i) {
-            gpuContext = &gpuContexts[i];
-            SDL_GpuDestroyDevice(gpuContext->device);
-        }
-        SDL_free(gpuContexts);
-        gpuContexts = NULL;
-    }
 }
 
-static SDL_bool render(void)
+static void render(SDL_Window *window)
 {
     double currentTime;
     SDL_GpuColorAttachmentDescription color_desc;
     SDL_GpuCommandBuffer *cmd;
     SDL_GpuRenderPass *pass;
 
-    cmd = SDL_GpuCreateCommandBuffer("empty command buffer", gpuContext->device);
+    if (!window) {
+        return;
+    }
+
+    cmd = SDL_GpuCreateCommandBuffer("empty command buffer", gpuDevice);
     if (!cmd) {
         SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuCreateCommandBuffer(): %s\n", SDL_GetError());
         quit(2);
@@ -87,7 +65,7 @@ static SDL_bool render(void)
     currentTime = (double)SDL_GetPerformanceCounter() / SDL_GetPerformanceFrequency();
 
     SDL_zero(color_desc);
-    color_desc.texture = SDL_GpuGetBackbuffer(gpuContext->device);
+    color_desc.texture = SDL_GpuGetBackbuffer(gpuDevice, window);
     color_desc.color_init = SDL_GPUPASSINIT_CLEAR;
     color_desc.clear_red = (float)(0.5 + 0.5 * SDL_sin(currentTime));
     color_desc.clear_green = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 2 / 3));
@@ -101,9 +79,7 @@ static SDL_bool render(void)
     }
 
     /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
-    SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_TRUE, NULL);
-
-    return SDL_TRUE;
+    SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_GPUPRESENT_VSYNC, NULL);
 }
 
 int main(int argc, char **argv)
@@ -137,9 +113,8 @@ int main(int argc, char **argv)
     SDL_Log("Screen BPP    : %d\n", SDL_BITSPERPIXEL(mode.format));
     SDL_GetWindowSize(state->windows[0], &dw, &dh);
     SDL_Log("Window Size   : %d,%d\n", dw, dh);
-    SDL_GpuGetTextureDescription(SDL_GpuGetBackbuffer(gpuContext->device), &texdesc);
+    SDL_GpuGetTextureDescription(SDL_GpuGetBackbuffer(gpuDevice, state->windows[0]), &texdesc);  /* !!! FIXME: probably shouldn't do this. */
     SDL_Log("Draw Size     : %d,%d\n", (int) texdesc.width, (int) texdesc.height);
-    SDL_Log("\n");
 
     /* Main render loop */
     frames = 0;
@@ -155,10 +130,7 @@ int main(int argc, char **argv)
         if (!done) {
             int i;
             for (i = 0; i < state->num_windows; ++i) {
-                if (state->windows[i]) {
-                    gpuContext = &gpuContexts[i];
-                    render();
-                }
+                render(state->windows[i]);
             }
         }
     }
@@ -169,10 +141,8 @@ int main(int argc, char **argv)
         SDL_Log("%2.2f frames per second\n", ((double)frames * 1000) / (now - then));
     }
 
-    shutdownGpu();
-    SDLTest_CommonQuit(state);
+    quit(0);
     return 0;
 }
 
 /* vi: set ts=4 sw=4 expandtab: */
-

From 5573ad2101befda57073d61574547de771c54a62 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 3 May 2022 00:22:24 -0400
Subject: [PATCH 22/54] gpu: Make CPU and GPU buffers separate objects.

Trying to make this generic is silly, because almost every API that
takes a "buffer" needs specifically one kind or the other, except
for the blit pass operation to copy between CPU and GPU, so that's
now separated into two functions that specify direction.

Also CPU buffer creation now takes an optional data pointer, which
saves you the trouble of lock/copy/unlock if you already have data
to store at creation time. This can be NULL to create without
initializing.
---
 include/SDL_gpu.h | 19 +++++++++++++------
 src/gpu/SDL_gpu.c | 22 ++++++++++++++++------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index a4b41b2441bf7..1de5d30baca6f 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -159,17 +159,23 @@ SDL_GpuDevice *SDL_GpuCreateDevice(const char *label);  /* `label` is for debugg
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
-typedef struct SDL_GpuBuffer SDL_GpuBuffer;
-SDL_GpuBuffer *SDL_GpuCreateCPUBuffer(SDL_GpuDevice *device, const Uint32 buflen);
-void *SDL_GpuLockCPUBuffer(SDL_GpuBuffer *buffer, Uint32 *_buflen);
-void SDL_GpuUnlockCPUBuffer(SDL_GpuBuffer *buffer);
+typedef struct SDL_GpuCpuBuffer SDL_GpuCpuBuffer;
+SDL_GpuCpuBuffer *SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
+void SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer);
+void *SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen);
+void SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
 
 /*
  * GPU buffers live in GPU-specific memory and can not be accessed by the CPU.
  *  If you need to get data to/from a GPU buffer, encode a blit operation
  *  to move it from/to a CPU buffer. Once in a CPU buffer, you can lock it to access data in your code.
+ * There is no initial `data` pointer here, like CPU buffers have, since there's significantly more
+ *  work to initialize them. If you plan to do an upload to the GPU buffer at the same time as
+ *  you create it, there's a convenience function that will do it at the cost of blocking until the
+ *  upload is complete: SDL_GpuCreateAndInitBuffer
  */
-SDL_GpuBuffer *SDL_GpuCreateBuffer(SDL_GpuDevice *device, const Uint32 length);
+typedef struct SDL_GpuBuffer SDL_GpuBuffer;
+SDL_GpuBuffer *SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 length);
 void SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer);
 
 
@@ -628,7 +634,8 @@ void SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offs
 
 void SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
 
-void SDL_GpuCopyBetweenBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+void SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+void SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
 
 void SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
                                      Uint32 srcpitch, Uint32 srcimgpitch,
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index f7be4f4940f05..be117129a6b29 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -36,23 +36,28 @@ SDL_GpuDestroyDevice(SDL_GpuDevice *device)
 {
 }
 
-SDL_GpuBuffer *
-SDL_GpuCreateCPUBuffer(SDL_GpuDevice *device, const Uint32 buflen)
+SDL_GpuCpuBuffer *
+SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
+{
+}
+
+void
+SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
 {
 }
 
 void *
-SDL_GpuLockCPUBuffer(SDL_GpuBuffer *buffer, Uint32 *_buflen)
+SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen)
 {
 }
 
 void
-SDL_GpuUnlockCPUBuffer(SDL_GpuBuffer *buffer)
+SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
 {
 }
 
 SDL_GpuBuffer *
-SDL_GpuCreateBuffer(SDL_GpuDevice *device, const Uint32 length)
+SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 length)
 {
 }
 
@@ -540,7 +545,12 @@ SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
 }
 
 void
-SDL_GpuCopyBetweenBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+}
+
+void
+SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
 }
 

From 1765f016313629936971504ac5def276f73f5149 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 3 May 2022 00:28:03 -0400
Subject: [PATCH 23/54] gpu: SDL_GpuCreateFence should take a label, too.

---
 include/SDL_gpu.h | 2 +-
 src/gpu/SDL_gpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 1de5d30baca6f..736d05c594514 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -660,7 +660,7 @@ void SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
  *  and if it's safe to touch resources that are no longer in-flight.
  */
 typedef struct SDL_GpuFence SDL_GpuFence;
-SDL_GpuFence *SDL_GpuCreateFence(SDL_GpuDevice *device);
+SDL_GpuFence *SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device);
 void SDL_GpuDestroyFence(SDL_GpuFence *fence);
 int SDL_GpuQueryFence(SDL_GpuFence *fence);
 int SDL_GpuResetFence(SDL_GpuFence *fence);
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index be117129a6b29..10b863ba241c7 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -577,7 +577,7 @@ SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
 }
 
 SDL_GpuFence *
-SDL_GpuCreateFence(SDL_GpuDevice *device)
+SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device)
 {
 }
 

From 0f419e73242782d3ffc824f2f491558beeb6d776 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 3 May 2022 00:30:56 -0400
Subject: [PATCH 24/54] gpu: Added SDL_GpuAbandonCommandBuffers.

This is to free a command buffer without submitting it to the GPU.
---
 include/SDL_gpu.h | 4 ++++
 src/gpu/SDL_gpu.c | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 736d05c594514..e15a91d668f11 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -685,6 +685,10 @@ typedef enum SDL_GpuPresentType
  */
 void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
 
+/* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
+   abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */
+void SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs);
+
 /* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
 
 /* Ends C function definitions when using C++ */
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 10b863ba241c7..576a32a302dfa 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -606,4 +606,9 @@ SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdb
 {
 }
 
+void
+SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs)
+{
+}
+
 /* vi: set ts=4 sw=4 expandtab: */

From e22a0abad18152ca83ec646aab2fdceb2e3c408f Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 3 May 2022 00:31:47 -0400
Subject: [PATCH 25/54] gpu: Added some optional helper functions to reduce
 boilerplate.

---
 include/SDL_gpu.h            |  47 +++++++++++++
 include/SDL_gpu_compiler.h   |   3 +-
 src/gpu/SDL_gpu.c            | 118 +++++++++++++++++++++++++++++++++
 src/gpu/SDL_gpu_cycle_impl.h | 124 +++++++++++++++++++++++++++++++++++
 4 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 src/gpu/SDL_gpu_cycle_impl.h

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index e15a91d668f11..df2e026e74fe8 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -691,6 +691,53 @@ void SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 n
 
 /* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
 
+
+/* Helper functions. These are optional and built on top of the public API to remove boilerplate from your code. */
+
+/* This makes a GPU buffer, and uploads data to it. This is not a fast call! But it removes a bunch of boilerplate code if you
+   just want to blast data to a GPU buffer. This will submit a command buffer with a blit pass to the device and wait for
+   it to complete. Returns NULL on error, the new GPU buffer otherwise. */
+SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
+
+/* Make sure your depth texture matches the window's backbuffer dimensions, if you don't care about managing the depth buffer yourself.
+ * This assumes the depth texture is not still in-flight from a previous frame! If the depth texture needs to be resized, previous contents
+ * will be lost.
+ */
+SDL_GpuTexture *SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depth);
+
+/* Since you need to leave a buffer untouched until the GPU is done with it, you often need to keep several buffers of uniforms
+   that you cycle through as the GPU processes prior frames. If you don't want to manage this yourself, you can use a buffer cycle
+   to do it for you. This will cycle through a list of buffers, each new request from the app returning the least-recently-used
+   item, under the presumption that when you get back to that item again it'll be available for reuse.
+   The "Ptr" version gives you the address of the item in the cycle, in case you need to rebuild it: for example, if you have a
+   cycle of depth textures and the window gets resized, you'd use the Ptr version to destroy and recreate the object in the cycle.
+   In normal use, you want the non-Ptr version, though. */
+typedef struct SDL_GpuCpuBufferCycle SDL_GpuCpuBufferCycle;
+SDL_GpuCpuBufferCycle *SDL_GpuCreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numbuffers);
+SDL_GpuCpuBuffer *SDL_GpuNextCpuBufferCycle(SDL_GpuCpuBufferCycle *cycle);
+SDL_GpuCpuBuffer **SDL_GpuNextCpuBufferPtrCycle(SDL_GpuCpuBufferCycle *cycle);
+void SDL_GpuDestroyCpuBufferCycle(SDL_GpuCpuBufferCycle *cycle);
+
+typedef struct SDL_GpuBufferCycle SDL_GpuBufferCycle;
+SDL_GpuBufferCycle *SDL_GpuCreateBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numbuffers);
+SDL_GpuBuffer *SDL_GpuNextBufferCycle(SDL_GpuBufferCycle *cycle);
+SDL_GpuBuffer **SDL_GpuNextBufferPtrCycle(SDL_GpuBufferCycle *cycle);
+void SDL_GpuDestroyBufferCycle(SDL_GpuBufferCycle *cycle);
+
+/* if the texdesc is NULL, you will get a cycle of NULL textures that you can create later with SDL_GpuNextTexturePtrCycle */
+typedef struct SDL_GpuTextureCycle SDL_GpuTextureCycle;
+SDL_GpuTextureCycle *SDL_GpuCreateTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numtextures);
+SDL_GpuTexture *SDL_GpuNextTextureCycle(SDL_GpuTextureCycle *cycle);
+SDL_GpuTexture **SDL_GpuNextTexturePtrCycle(SDL_GpuTextureCycle *cycle);
+void SDL_GpuDestroyTextureCycle(SDL_GpuTextureCycle *cycle);
+
+typedef struct SDL_GpuFenceCycle SDL_GpuFenceCycle;
+SDL_GpuFenceCycle *SDL_GpuCreateFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numfences);
+SDL_GpuFence *SDL_GpuNextFenceCycle(SDL_GpuFenceCycle *cycle);
+SDL_GpuFence **SDL_GpuNextFencePtrCycle(SDL_GpuFenceCycle *cycle);
+void SDL_GpuDestroyFenceCycle(SDL_GpuFenceCycle *cycle);
+
+
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 }
diff --git a/include/SDL_gpu_compiler.h b/include/SDL_gpu_compiler.h
index 6c58a85ea2a06..5b0fdba8d73cb 100644
--- a/include/SDL_gpu_compiler.h
+++ b/include/SDL_gpu_compiler.h
@@ -44,7 +44,8 @@ extern "C" {
  * speed over optimization. If one really wants a strong optimizing compiler,
  * one should build an external tool.  :)
  */
-int SDL_GpuCompileShader(const char *src, const Uint32 srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
+/* !!! FIXME: don't use a string for type, just an enum */
+int SDL_GpuCompileShader(const char *src, const int srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
 
 /* !!! FIXME: There's probably a lot of other stuff we want to put in here. */
 
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 576a32a302dfa..b184cb4458179 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -611,4 +611,122 @@ SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmd
 {
 }
 
+SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
+{
+    SDL_GpuFence *fence = NULL;
+    SDL_GpuCpuBuffer *staging = NULL;
+    SDL_GpuBuffer *gpubuf = NULL;
+    SDL_GpuBuffer *retval = NULL;
+    SDL_GpuCommandBuffer *cmd = NULL;
+    SDL_GpuBlitPass *blit = NULL;
+
+    if (device == NULL) {
+        SDL_InvalidParamError("device");
+        return NULL;
+    } else if (data == NULL) {
+        SDL_InvalidParamError("data");
+        return NULL;
+    }
+
+    if ( ((fence = SDL_GpuCreateFence("Temporary fence for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
+         ((staging = SDL_GpuCreateCpuBuffer("Staging buffer for SDL_GpuCreateAndInitBuffer", device, buflen, data)) != NULL) &&
+         ((gpubuf = SDL_GpuCreateBuffer(label, device, buflen)) != NULL) &&
+         ((cmd = SDL_GpuCreateCommandBuffer("Command buffer for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
+         ((blit = SDL_GpuStartBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
+        SDL_GpuCopyBufferCpuToGpu(blit, staging, 0, gpubuf, 0, buflen);
+        SDL_GpuEndBlitPass(blit);
+        SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_GPUPRESENT_NONE, fence);
+        SDL_GpuWaitFence(fence);  /* so we know it's definitely uploaded */
+        retval = gpubuf;
+    }
+
+    if (!retval) {
+        SDL_GpuEndBlitPass(blit);   /* assume this might be un-ended. */
+        SDL_GpuAbandonCommandBuffers(&cmd, 1);
+        SDL_GpuDestroyBuffer(gpubuf);
+    }
+    SDL_GpuDestroyCpuBuffer(staging);
+    SDL_GpuDestroyFence(fence);
+    return retval;
+}
+
+SDL_GpuTexture *
+SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depthtex)
+{
+    SDL_GpuTextureDescription bbtexdesc, depthtexdesc;
+
+    if (!device) {
+        SDL_InvalidParamError("device");
+        return NULL;
+    } else if (!backbuffer) {
+        SDL_InvalidParamError("backbuffer");
+        return NULL;
+    } else if (!depthtex) {
+        SDL_InvalidParamError("depthtex");
+        return NULL;
+    }
+
+    SDL_GpuGetTextureDescription(backbuffer, &bbtexdesc);
+
+    if (*depthtex) {
+        SDL_GpuGetTextureDescription(*depthtex, &depthtexdesc);
+    }
+
+    /* !!! FIXME: check texture_type, pixel_format, etc? */
+    if (!*depthtex || (depthtexdesc.width != bbtexdesc.width) || (depthtexdesc.height != bbtexdesc.height)) {
+        SDL_zero(depthtexdesc);
+        depthtexdesc.label = label;
+        depthtexdesc.texture_type = SDL_GPUTEXTYPE_2D;
+        depthtexdesc.pixel_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
+        depthtexdesc.usage = SDL_GPUTEXUSAGE_RENDER_TARGET;  /* !!! FIXME: does this need shader read or write to be the depth buffer? */
+        depthtexdesc.width = bbtexdesc.width;
+        depthtexdesc.height = bbtexdesc.width;
+        SDL_GpuDestroyTexture(*depthtex);
+        *depthtex = SDL_GpuCreateTexture(device, &depthtexdesc);
+    }
+
+    return *depthtex;
+}
+
+/* various object cycle APIs ... */
+#define SDL_GPUCYCLETYPE SDL_GpuCpuBufferCycle
+#define SDL_GPUCYCLEITEMTYPE SDL_GpuCpuBuffer
+#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextCpuBufferCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextCpuBufferPtrCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyCpuBufferCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateCpuBuffer(lbl, device, bufsize, data); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyCpuBuffer
+#include "SDL_gpu_cycle_impl.h"
+
+#define SDL_GPUCYCLETYPE SDL_GpuBufferCycle
+#define SDL_GPUCYCLEITEMTYPE SDL_GpuBuffer
+#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextBufferCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextBufferPtrCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyBufferCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateBuffer(lbl, device, bufsize); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyBuffer
+#include "SDL_gpu_cycle_impl.h"
+
+#define SDL_GPUCYCLETYPE SDL_GpuTextureCycle
+#define SDL_GPUCYCLEITEMTYPE SDL_GpuTexture
+#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextTextureCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextTexturePtrCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyTextureCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { if (texdesc) { SDL_GpuTextureDescription td; SDL_memcpy(&td, texdesc, sizeof (td)); td.label = lbl; itemvar = SDL_GpuCreateTexture(device, &td); failvar = (itemvar == NULL); } else { itemvar = NULL; failvar = SDL_FALSE; } }
+#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyTexture
+#include "SDL_gpu_cycle_impl.h"
+
+#define SDL_GPUCYCLETYPE SDL_GpuFenceCycle
+#define SDL_GPUCYCLEITEMTYPE SDL_GpuFence
+#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextFenceCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextFencePtrCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyFenceCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateFence(lbl, device); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyFence
+#include "SDL_gpu_cycle_impl.h"
+
 /* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/gpu/SDL_gpu_cycle_impl.h b/src/gpu/SDL_gpu_cycle_impl.h
new file mode 100644
index 0000000000000..0b2920da787b5
--- /dev/null
+++ b/src/gpu/SDL_gpu_cycle_impl.h
@@ -0,0 +1,124 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+/* this file is included multiple times with different defines. */
+
+struct SDL_GPUCYCLETYPE
+{
+    const char *label;
+    Uint32 num_items;
+    Uint32 next_item;
+    SDL_GPUCYCLEITEMTYPE *items[SDL_VARIABLE_LENGTH_ARRAY];
+};
+
+SDL_GPUCYCLETYPE *
+SDL_GPUCYCLECREATEFNSIG
+{
+    /* allocate the whole thing as one block: `items` as a variable length array at the end, the label data after that. */
+    const size_t labellen = label ? (SDL_strlen(label) + 1) : 0;
+    const size_t thislabellen = label ? 0 : labellen + 32;
+    const size_t alloclen = sizeof (SDL_GPUCYCLETYPE) + (sizeof (SDL_GPUCYCLEITEMTYPE *) * numitems) + labellen;
+    SDL_GPUCYCLETYPE *retval = (SDL_GPUCYCLETYPE *) SDL_calloc(1, alloclen);
+    char *thislabel = NULL;
+    SDL_bool isstack = SDL_FALSE;
+    Uint32 i;
+
+    if (!retval) {
+        SDL_OutOfMemory();
+        return NULL;
+    }
+
+    thislabel = label ? SDL_small_alloc(char, thislabellen, &isstack) : NULL;
+    for (i = 0; i < numitems; i++) {
+        SDL_bool failed = SDL_TRUE;
+        if (thislabel) {
+            SDL_snprintf(thislabel, thislabellen, "%s (cycle %u/%u)", label, (unsigned int) i, (unsigned int) numitems);
+        }
+        SDL_GPUCYCLECREATE(thislabel, failed, retval->items[i]);
+        if (failed) {
+            Uint32 j;
+            for (j = 0; j < i; j++) {
+                SDL_GPUCYCLEDESTROY(retval->items[j]);
+            }
+            SDL_free(retval);
+            if (thislabel) {
+                SDL_small_free(thislabel, isstack);
+            }
+            return NULL;
+        }
+    }
+
+    if (label) {
+        char *ptr = ((char *) retval) + (sizeof (SDL_GPUCYCLETYPE) + (sizeof (SDL_GPUCYCLEITEMTYPE *) * numitems));
+        SDL_strlcpy(ptr, label, labellen);
+        retval->label = ptr;
+    }
+
+    retval->num_items = numitems;
+    return retval;
+}
+
+SDL_GPUCYCLEITEMTYPE **
+SDL_GPUCYCLENEXTPTRFNNAME(SDL_GPUCYCLETYPE *cycle)
+
+{
+    SDL_GPUCYCLEITEMTYPE **retval = NULL;
+    if (!cycle) {
+        SDL_InvalidParamError("cycle");
+    } else {
+        retval = &cycle->items[cycle->next_item++];
+        if (cycle->next_item >= cycle->num_items) {
+            cycle->next_item = 0;
+        }
+    }
+    return retval;
+}
+
+SDL_GPUCYCLEITEMTYPE *
+SDL_GPUCYCLENEXTFNNAME(SDL_GPUCYCLETYPE *cycle)
+{
+    SDL_GPUCYCLEITEMTYPE **itemptr = SDL_GPUCYCLENEXTPTRFNNAME(cycle);
+    return itemptr ? *itemptr : NULL;
+}
+
+void
+SDL_GPUCYCLEDESTROYFNNAME(SDL_GPUCYCLETYPE *cycle)
+{
+    if (cycle) {
+        Uint32 i;
+        for (i = 0; i < cycle->num_items; i++) {
+            SDL_GPUCYCLEDESTROY(cycle->items[i]);
+        }
+        SDL_free(cycle);  /* this frees everything, including the variable length array and the string data that label points to. */
+    }
+}
+
+#undef SDL_GPUCYCLETYPE
+#undef SDL_GPUCYCLEITEMTYPE
+#undef SDL_GPUCYCLECREATEFNSIG
+#undef SDL_GPUCYCLENEXTFNNAME
+#undef SDL_GPUCYCLENEXTPTRFNNAME
+#undef SDL_GPUCYCLEDESTROYFNNAME
+#undef SDL_GPUCYCLECREATE
+#undef SDL_GPUCYCLEDESTROY
+
+/* vi: set ts=4 sw=4 expandtab: */
+

From 9bc9d1913da00c3cdac5703c95d36be4262f32fc Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 3 May 2022 00:33:23 -0400
Subject: [PATCH 26/54] gpu: Added test/testgpu_spinning_cube.c

This is the equivalent of test/testgles2.c with the SDL GPU API.

Work in progress!
---
 test/CMakeLists.txt          |   1 +
 test/testgpu_spinning_cube.c | 582 +++++++++++++++++++++++++++++++++++
 2 files changed, 583 insertions(+)
 create mode 100644 test/testgpu_spinning_cube.c

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 80c6a6280fb63..89766daa8e0b1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -361,6 +361,7 @@ add_sdl_test_executable(testvulkan NO_C90 SOURCES testvulkan.c)
 add_sdl_test_executable(testoffscreen SOURCES testoffscreen.c)
 add_sdl_test_executable(testpopup SOURCES testpopup.c)
 add_sdl_test_executable(testgpu_simple_clear TESTUTILS SOURCES testgpu_simple_clear.c)
+add_sdl_test_executable(testgpu_spinning_cube TESTUTILS SOURCES testgpu_spinning_cube.c)
 
 check_c_compiler_flag(-Wformat-overflow HAVE_WFORMAT_OVERFLOW)
 if(HAVE_WFORMAT_OVERFLOW)
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
new file mode 100644
index 0000000000000..fba43bdd2fb95
--- /dev/null
+++ b/test/testgpu_spinning_cube.c
@@ -0,0 +1,582 @@
+/*
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely.
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten/emscripten.h>
+#endif
+
+#include "SDL_test_common.h"
+#include "SDL_gpu.h"
+#include "SDL_gpu_compiler.h"
+
+typedef struct RenderState
+{
+    SDL_GpuBuffer *gpubuf_static;
+    SDL_GpuPipeline *pipeline;
+} RenderState;
+
+typedef struct WindowState
+{
+    int angle_x, angle_y, angle_z;
+    SDL_GpuTextureCycle *texcycle_depth;
+    SDL_GpuCpuBufferCycle *cpubufcycle_uniforms;
+    SDL_GpuBufferCycle *gpubufcycle_uniforms;
+} WindowState;
+
+static SDL_GpuDevice *gpu_device = NULL;
+static RenderState render_state;
+static SDLTest_CommonState *state = NULL;
+static WindowState *window_states = NULL;
+
+static void shutdownGpu(void)
+{
+    /* !!! FIXME: We need a WaitIdle API */
+    if (window_states) {
+        int i;
+        for (i = 0; i < state->num_windows; i++) {
+            WindowState *winstate = &window_states[i];
+            SDL_GpuDestroyTextureCycle(winstate->texcycle_depth);
+            SDL_GpuDestroyCpuBufferCycle(winstate->cpubufcycle_uniforms);
+            SDL_GpuDestroyBufferCycle(winstate->gpubufcycle_uniforms);
+        }
+        SDL_free(window_states);
+        window_states = NULL;
+    }
+
+    SDL_GpuDestroyBuffer(render_state.gpubuf_static);
+    SDL_GpuDestroyPipeline(render_state.pipeline);
+    SDL_GpuDestroyDevice(gpu_device);
+
+    SDL_zero(render_state);
+    gpu_device = NULL;
+}
+
+
+/* Call this instead of exit(), so we can clean up SDL: atexit() is evil. */
+static void
+quit(int rc)
+{
+    shutdownGpu();
+    SDLTest_CommonQuit(state);
+    exit(rc);
+}
+
+/* 
+ * Simulates desktop's glRotatef. The matrix is returned in column-major 
+ * order. 
+ */
+static void
+rotate_matrix(float angle, float x, float y, float z, float *r)
+{
+    float radians, c, s, c1, u[3], length;
+    int i, j;
+
+    radians = (float)(angle * M_PI) / 180.0f;
+
+    c = SDL_cosf(radians);
+    s = SDL_sinf(radians);
+
+    c1 = 1.0f - SDL_cosf(radians);
+
+    length = (float)SDL_sqrt(x * x + y * y + z * z);
+
+    u[0] = x / length;
+    u[1] = y / length;
+    u[2] = z / length;
+
+    for (i = 0; i < 16; i++) {
+        r[i] = 0.0;
+    }
+
+    r[15] = 1.0;
+
+    for (i = 0; i < 3; i++) {
+        r[i * 4 + (i + 1) % 3] = u[(i + 2) % 3] * s;
+        r[i * 4 + (i + 2) % 3] = -u[(i + 1) % 3] * s;
+    }
+
+    for (i = 0; i < 3; i++) {
+        for (j = 0; j < 3; j++) {
+            r[i * 4 + j] += c1 * u[i] * u[j] + (i == j ? c : 0.0f);
+        }
+    }
+}
+
+/* 
+ * Simulates gluPerspectiveMatrix 
+ */
+static void 
+perspective_matrix(float fovy, float aspect, float znear, float zfar, float *r)
+{
+    int i;
+    float f;
+
+    f = 1.0f/SDL_tanf(fovy * 0.5f);
+
+    for (i = 0; i < 16; i++) {
+        r[i] = 0.0;
+    }
+
+    r[0] = f / aspect;
+    r[5] = f;
+    r[10] = (znear + zfar) / (znear - zfar);
+    r[11] = -1.0f;
+    r[14] = (2.0f * znear * zfar) / (znear - zfar);
+    r[15] = 0.0f;
+}
+
+/* 
+ * Multiplies lhs by rhs and writes out to r. All matrices are 4x4 and column
+ * major. In-place multiplication is supported.
+ */
+static void
+multiply_matrix(float *lhs, float *rhs, float *r)
+{
+    int i, j, k;
+    float tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 4; j++) {
+            tmp[j * 4 + i] = 0.0;
+
+            for (k = 0; k < 4; k++) {
+                tmp[j * 4 + i] += lhs[k * 4 + i] * rhs[j * 4 + k];
+            }
+        }
+    }
+
+    for (i = 0; i < 16; i++) {
+        r[i] = tmp[i];
+    }
+}
+
+typedef struct VertexData
+{
+    float x, y, z; /* 3D data. Vertex range -0.5..0.5 in all axes. Z -0.5 is near, 0.5 is far. */
+    float red, green, blue;  /* intensity 0 to 1 (alpha is always 1). */
+} VertexData;
+
+static const VertexData vertex_data[] = {
+    /* Front face. */
+    /* Bottom left */
+    { -0.5,  0.5, -0.5, 1.0, 0.0, 0.0 }, /* red */
+    {  0.5, -0.5, -0.5, 0.0, 0.0, 1.0 }, /* blue */
+    { -0.5, -0.5, -0.5, 0.0, 1.0, 0.0 }, /* green */
+
+    /* Top right */
+    { -0.5, 0.5, -0.5, 1.0, 0.0, 0.0 }, /* red */
+    { 0.5,  0.5, -0.5, 1.0, 1.0, 0.0 }, /* yellow */
+    { 0.5, -0.5, -0.5, 0.0, 0.0, 1.0 }, /* blue */
+
+    /* Left face */
+    /* Bottom left */
+    { -0.5,  0.5,  0.5, 1.0, 1.0, 1.0 }, /* white */
+    { -0.5, -0.5, -0.5, 0.0, 1.0, 0.0 }, /* green */
+    { -0.5, -0.5,  0.5, 0.0, 1.0, 1.0 }, /* cyan */
+
+    /* Top right */
+    { -0.5,  0.5,  0.5, 1.0, 1.0, 1.0 }, /* white */
+    { -0.5,  0.5, -0.5, 1.0, 0.0, 0.0 }, /* red */
+    { -0.5, -0.5, -0.5, 0.0, 1.0, 0.0 }, /* green */
+
+    /* Top face */
+    /* Bottom left */
+    { -0.5, 0.5,  0.5, 1.0, 1.0, 1.0 }, /* white */
+    {  0.5, 0.5, -0.5, 1.0, 1.0, 0.0 }, /* yellow */
+    { -0.5, 0.5, -0.5, 1.0, 0.0, 0.0 }, /* red */
+
+    /* Top right */
+    { -0.5, 0.5,  0.5, 1.0, 1.0, 1.0 }, /* white */
+    {  0.5, 0.5,  0.5, 0.0, 0.0, 0.0 }, /* black */
+    {  0.5, 0.5, -0.5, 1.0, 1.0, 0.0 }, /* yellow */
+
+    /* Right face */
+    /* Bottom left */
+    { 0.5,  0.5, -0.5, 1.0, 1.0, 0.0 }, /* yellow */
+    { 0.5, -0.5,  0.5, 1.0, 0.0, 1.0 }, /* magenta */
+    { 0.5, -0.5, -0.5, 0.0, 0.0, 1.0 }, /* blue */
+
+    /* Top right */
+    { 0.5,  0.5, -0.5, 1.0, 1.0, 0.0 }, /* yellow */
+    { 0.5,  0.5,  0.5, 0.0, 0.0, 0.0 }, /* black */
+    { 0.5, -0.5,  0.5, 1.0, 0.0, 1.0 }, /* magenta */
+
+    /* Back face */
+    /* Bottom left */
+    {  0.5,  0.5, 0.5, 0.0, 0.0, 0.0 }, /* black */
+    { -0.5, -0.5, 0.5, 0.0, 1.0, 1.0 }, /* cyan */
+    {  0.5, -0.5, 0.5, 1.0, 0.0, 1.0 }, /* magenta */
+
+    /* Top right */
+    {  0.5,  0.5,  0.5, 0.0, 0.0, 0.0 }, /* black */
+    { -0.5,  0.5,  0.5, 1.0, 1.0, 1.0 }, /* white */
+    { -0.5, -0.5,  0.5, 0.0, 1.0, 1.0 }, /* cyan */
+
+    /* Bottom face */
+    /* Bottom left */
+    { -0.5, -0.5, -0.5, 0.0, 1.0, 0.0 }, /* green */
+    {  0.5, -0.5,  0.5, 1.0, 0.0, 1.0 }, /* magenta */
+    { -0.5, -0.5,  0.5, 0.0, 1.0, 1.0 }, /* cyan */
+
+    /* Top right */
+    { -0.5, -0.5, -0.5, 0.0, 1.0, 0.0 }, /* green */
+    {  0.5, -0.5, -0.5, 0.0, 0.0, 1.0 }, /* blue */
+    {  0.5, -0.5,  0.5, 1.0, 0.0, 1.0 } /* magenta */
+};
+
+/* !!! FIXME: these shaders need to change. This is just the GLES2 shaders right now. */
+static const char* shader_vert_src =
+" attribute vec4 av4position; "
+" attribute vec3 av3color; "
+" uniform mat4 mvp; "
+" varying vec3 vv3color; "
+" void main() { "
+"    vv3color = av3color; "
+"    gl_Position = mvp * av4position; "
+" } ";
+
+static const char* shader_frag_src =
+" precision lowp float; "
+" varying vec3 vv3color; "
+" void main() { "
+"    gl_FragColor = vec4(vv3color, 1.0); "
+" } ";
+
+static void
+Render(SDL_Window *window, const int windownum)
+{
+    WindowState *winstate = &window_states[windownum];
+    SDL_GpuTexture *backbuffer = SDL_GpuGetBackbuffer(gpu_device, window);
+    const SDL_GpuPresentType presenttype = (state->render_flags & SDL_RENDERER_PRESENTVSYNC) ? SDL_GPUPRESENT_VSYNC : SDL_GPUPRESENT_IMMEDIATE;
+    SDL_GpuColorAttachmentDescription color_attachment;
+    SDL_GpuDepthAttachmentDescription depth_attachment;
+    SDL_GpuTexture **depth_texture_ptr;
+    SDL_GpuCpuBuffer *cpubuf_uniforms = SDL_GpuNextCpuBufferCycle(winstate->cpubufcycle_uniforms);
+    SDL_GpuBuffer *gpubuf_uniforms = SDL_GpuNextBufferCycle(winstate->gpubufcycle_uniforms);
+    SDL_GpuTextureDescription texdesc;
+    float matrix_rotate[16], matrix_modelview[16], matrix_perspective[16];
+    Uint32 drawablew, drawableh;
+    SDL_GpuCommandBuffer *cmd;
+    SDL_GpuRenderPass *render;
+    SDL_GpuBlitPass *blit;
+    char label[64];
+
+    if (!backbuffer) {
+        SDL_Log("Uhoh, no backbuffer for window #%d!\n", windownum);
+        return;
+    }
+
+    SDL_GpuGetTextureDescription(backbuffer, &texdesc);
+    drawablew = texdesc.width;
+    drawableh = texdesc.height;
+
+    /* 
+    * Do some rotation with Euler angles. It is not a fixed axis as
+    * quaterions would be, but the effect is cool. 
+    */
+    rotate_matrix((float)winstate->angle_x, 1.0f, 0.0f, 0.0f, matrix_modelview);
+    rotate_matrix((float)winstate->angle_y, 0.0f, 1.0f, 0.0f, matrix_rotate);
+
+    multiply_matrix(matrix_rotate, matrix_modelview, matrix_modelview);
+
+    rotate_matrix((float)winstate->angle_z, 0.0f, 1.0f, 0.0f, matrix_rotate);
+
+    multiply_matrix(matrix_rotate, matrix_modelview, matrix_modelview);
+
+    /* Pull the camera back from the cube */
+    matrix_modelview[14] -= 2.5;
+
+    perspective_matrix(45.0f, (float)drawablew/drawableh, 0.01f, 100.0f, matrix_perspective);
+
+    multiply_matrix(matrix_perspective, matrix_modelview, (float *) SDL_GpuLockCpuBuffer(cpubuf_uniforms, NULL));
+    SDL_GpuUnlockCpuBuffer(cpubuf_uniforms);
+
+    winstate->angle_x += 3;
+    winstate->angle_y += 2;
+    winstate->angle_z += 1;
+
+    if(winstate->angle_x >= 360) winstate->angle_x -= 360;
+    if(winstate->angle_x < 0) winstate->angle_x += 360;
+    if(winstate->angle_y >= 360) winstate->angle_y -= 360;
+    if(winstate->angle_y < 0) winstate->angle_y += 360;
+    if(winstate->angle_z >= 360) winstate->angle_z -= 360;
+    if(winstate->angle_z < 0) winstate->angle_z += 360;
+
+    /* Copy the new uniform data to the GPU */
+    cmd = SDL_GpuCreateCommandBuffer("Render new frame", gpu_device);
+    if (!cmd) {
+        SDL_Log("Failed to create command buffer: %s\n", SDL_GetError());
+        quit(2);
+    }
+
+    blit = SDL_GpuStartBlitPass("Copy mvp matrix to GPU pass", cmd);
+    if (!blit) {
+        SDL_Log("Failed to create blit pass: %s\n", SDL_GetError());
+        quit(2);
+    }
+
+    SDL_GpuCopyBufferCpuToGpu(blit, cpubuf_uniforms, 0, gpubuf_uniforms, 0, sizeof (float) * 16);
+    SDL_GpuEndBlitPass(blit);
+
+    SDL_zero(color_attachment);
+    color_attachment.texture = backbuffer;
+    color_attachment.color_init = SDL_GPUPASSINIT_CLEAR;
+    color_attachment.clear_alpha = 1.0f;
+
+    /* resize the depth texture if the window size changed */
+    SDL_snprintf(label, sizeof (label), "Depth buffer for window #%d", windownum);
+    depth_texture_ptr = SDL_GpuNextTexturePtrCycle(winstate->texcycle_depth);
+    if (SDL_GpuMatchingDepthTexture(label, gpu_device, color_attachment.texture, depth_texture_ptr) == NULL) {
+        SDL_Log("Failed to prepare depth buffer for window #%d: %s\n", windownum, SDL_GetError());
+        quit(2);
+    }
+
+    SDL_zero(depth_attachment);
+    depth_attachment.texture = *depth_texture_ptr;
+    depth_attachment.depth_init = SDL_GPUPASSINIT_CLEAR;
+    depth_attachment.clear_depth = 0.0;
+
+
+    /* Draw the cube! */
+
+    /* !!! FIXME: does viewport/scissor default to the texture size? Because that would be nice. */
+    render = SDL_GpuStartRenderPass("Spinning cube render pass", cmd, 1, &color_attachment, &depth_attachment, NULL);
+    SDL_GpuSetRenderPassPipeline(render, render_state.pipeline);
+    SDL_GpuSetRenderPassViewport(render, 0.0, 0.0, (double) drawablew, (double) drawableh, 0.0, 1.0);  /* !!! FIXME near and far are wrong */
+    SDL_GpuSetRenderPassScissor(render, 0.0, 0.0, (double) drawablew, (double) drawableh);
+    SDL_GpuSetRenderPassVertexBuffer(render, render_state.gpubuf_static, 0, 0);
+    SDL_GpuSetRenderPassVertexBuffer(render, gpubuf_uniforms, 0, 1);
+    SDL_GpuDraw(render, 0, SDL_arraysize(vertex_data));
+    SDL_GpuEndRenderPass(render);
+
+    SDL_GpuSubmitCommandBuffers(&cmd, 1, presenttype, NULL);  /* push work to the GPU and tell it to present to the window when done. */
+}
+
+static SDL_GpuShader *load_shader(const char *src, const char *type)
+{
+    SDL_GpuShader *retval = NULL;
+    Uint8 *bytecode = NULL;
+    Uint32 bytecodelen = 0;
+    if (SDL_GpuCompileShader(src, -1, type, "main", &bytecode, &bytecodelen) == -1) {
+        SDL_Log("Failed to compile %s shader: %s", type, SDL_GetError());
+        quit(2);
+    }
+    retval = SDL_GpuLoadShader(gpu_device, bytecode, bytecodelen);
+    if (!retval) {
+        SDL_Log("Failed to load %s shader bytecode: %s", type, SDL_GetError());
+        quit(2);
+    }
+
+    SDL_free(bytecode);
+
+    return retval;
+}
+
+static void
+init_render_state(void)
+{
+    SDL_GpuCommandBuffer *cmd;
+    SDL_GpuPipelineDescription pipelinedesc;
+    SDL_GpuTextureDescription texdesc;
+    SDL_GpuShader *vertex_shader;
+    SDL_GpuShader *fragment_shader;
+    void *ptr;
+    int i;
+
+    #define CHECK_CREATE(var, thing) { if (!(var)) { SDL_Log("Failed to create %s: %s\n", thing, SDL_GetError()); quit(2); } }
+
+    gpu_device = SDL_GpuCreateDevice("The GPU device");
+    CHECK_CREATE(gpu_device, "GPU device");
+
+    vertex_shader = load_shader(shader_vert_src, "vertex");
+    fragment_shader = load_shader(shader_frag_src, "fragment");
+
+    /* We just need to upload the static data once. */
+    render_state.gpubuf_static = SDL_GpuCreateAndInitBuffer("Static vertex data GPU buffer", gpu_device, sizeof (vertex_data), vertex_data);
+    CHECK_CREATE(render_state.gpubuf_static, "static vertex GPU buffer");
+
+    SDL_GpuDefaultPipelineDescription(&pipelinedesc);
+    pipelinedesc.label = "The spinning cube pipeline";
+    pipelinedesc.primitive = SDL_GPUPRIM_TRIANGLESTRIP;
+    pipelinedesc.vertex_shader = vertex_shader;
+    pipelinedesc.fragment_shader = fragment_shader;
+    pipelinedesc.num_vertex_attributes = 2;
+    pipelinedesc.vertices[0].format = SDL_GPUVERTFMT_FLOAT3;
+    pipelinedesc.vertices[1].format = SDL_GPUVERTFMT_FLOAT3;
+    pipelinedesc.vertices[1].index = 1;
+    pipelinedesc.num_color_attachments = 1;
+    pipelinedesc.color_attachments[0].pixel_format = SDL_GPUPIXELFMT_RGBA8;
+    pipelinedesc.color_attachments[0].blending_enabled = SDL_FALSE;
+    pipelinedesc.depth_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
+
+    render_state.pipeline = SDL_GpuCreatePipeline(gpu_device, &pipelinedesc);
+    if (!render_state.pipeline) {
+        SDL_Log("Failed to create render pipeline: %s\n", SDL_GetError());
+        quit(2);
+    }
+
+    /* These are reference-counted; once the pipeline is created, you don't need to keep these. */
+    SDL_GpuDestroyShader(vertex_shader);
+    SDL_GpuDestroyShader(fragment_shader);
+
+    window_states = (WindowState *) SDL_calloc(state->num_windows, sizeof (WindowState));
+    if (!window_states) {
+        SDL_Log("Out of memory!\n");
+        quit(2);
+    }
+
+    for (i = 0; i < state->num_windows; i++) {
+        /* each window gets a cycle of buffers and depth textures, so we don't have to wait for them
+           to finish; by the time they come around again in the cycle, they're available to use again. */
+        WindowState *winstate = &window_states[i];
+        char label[32];
+
+        SDL_snprintf(label, sizeof (label), "Window #%d uniform staging buffer", i);
+        winstate->cpubufcycle_uniforms = SDL_GpuCreateCpuBufferCycle(label, gpu_device, sizeof (float) * 16, NULL, 3);
+        CHECK_CREATE(winstate->cpubufcycle_uniforms, label);
+
+        SDL_snprintf(label, sizeof (label), "Window #%d uniform GPU buffer", i);
+        winstate->gpubufcycle_uniforms = SDL_GpuCreateBufferCycle(label, gpu_device, sizeof (float) * 16, 3);
+        CHECK_CREATE(winstate->gpubufcycle_uniforms, label);
+
+        SDL_snprintf(label, sizeof (label), "Window #%d depth texture", i);  /* NULL texdesc, so we'll build them as we need them. */
+        winstate->texcycle_depth = SDL_GpuCreateTextureCycle(label, gpu_device, NULL, 3);
+        CHECK_CREATE(winstate->texcycle_depth, label);
+
+        /* make each window different */
+        winstate->angle_x = (i * 10) % 360;
+        winstate->angle_y = (i * 20) % 360;
+        winstate->angle_z = (i * 30) % 360;
+    }
+}
+
+
+static int done = 0;
+static Uint32 frames = 0;
+
+void loop()
+{
+    SDL_Event event;
+    int i;
+    int status;
+
+    /* Check for events */
+    ++frames;
+    while (SDL_PollEvent(&event) && !done) {
+        SDLTest_CommonEvent(state, &event, &done);
+    }
+    if (!done) {
+        for (i = 0; i < state->num_windows; ++i) {
+            Render(state->windows[i], i);
+        }
+    }
+#ifdef __EMSCRIPTEN__
+    else {
+        emscripten_cancel_main_loop();
+    }
+#endif
+}
+
+int
+main(int argc, char *argv[])
+{
+    int fsaa;
+    int value;
+    int i;
+    SDL_DisplayMode mode;
+    Uint32 then, now;
+    int status;
+
+    /* Initialize parameters */
+    fsaa = 0;
+
+    /* Initialize test framework */
+    state = SDLTest_CommonCreateState(argv, SDL_INIT_VIDEO);
+    if (!state) {
+        return 1;
+    }
+
+    state->skip_renderer = 1;
+
+    for (i = 1; i < argc;) {
+        int consumed;
+
+        consumed = SDLTest_CommonArg(state, i);
+        if (consumed == 0) {
+            if (SDL_strcasecmp(argv[i], "--fsaa") == 0) {
+                ++fsaa;
+                consumed = 1;
+            } else {
+                consumed = -1;
+            }
+        }
+        if (consumed < 0) {
+            static const char *options[] = { "[--fsaa]", NULL };
+            SDLTest_CommonLogUsage(state, argv[0], options);
+            quit(1);
+        }
+        i += consumed;
+    }
+
+    state->window_flags |= SDL_WINDOW_RESIZABLE;
+
+    if (!SDLTest_CommonInit(state)) {
+        quit(2);
+        return 0;
+    }
+
+    SDL_GetCurrentDisplayMode(0, &mode);
+    SDL_Log("Screen bpp: %d\n", SDL_BITSPERPIXEL(mode.format));
+
+    #if 0  // !!! FIXME: report any of this through the Gpu API?
+    SDL_Log("\n");
+    SDL_Log("Vendor     : %s\n", ctx.glGetString(GL_VENDOR));
+    SDL_Log("Renderer   : %s\n", ctx.glGetString(GL_RENDERER));
+    SDL_Log("Version    : %s\n", ctx.glGetString(GL_VERSION));
+    SDL_Log("Extensions : %s\n", ctx.glGetString(GL_EXTENSIONS));
+    SDL_Log("\n");
+    #endif
+
+    /* !!! FIXME: use fsaa once multisample support is wired into the API */
+
+    init_render_state();
+
+    /* Main render loop */
+    frames = 0;
+    then = SDL_GetTicks();
+    done = 0;
+
+#ifdef __EMSCRIPTEN__
+    emscripten_set_main_loop(loop, 0, 1);
+#else
+    while (!done) {
+        loop();
+    }
+#endif
+
+    /* Print out some timing information */
+    now = SDL_GetTicks();
+    if (now > then) {
+        SDL_Log("%2.2f frames per second\n",
+               ((double) frames * 1000) / (now - then));
+    }
+#if !defined(__ANDROID__) && !defined(__NACL__)  
+    quit(0);
+#endif    
+    return 0;
+}
+
+/* vi: set ts=4 sw=4 expandtab: */

From 4436d1c757537fe871f5d45f7bd7eb67dd2c0ac0 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 14:25:16 -0400
Subject: [PATCH 27/54] gpu: Call it "CreateShader" instead of "LoadShader"
 like other GPU objects.

---
 include/SDL_gpu.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index df2e026e74fe8..0d098fc270214 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -94,7 +94,7 @@ extern "C" {
  *    work on OpenGL 2 and Direct3D 9, but why bother in modern times?
  *    (then again: maybe we can support enough of this to make many
  *    reasonable apps run on older GL/D3D, and just fail in
- *    SDL_GpuLoadShader on unsupported stuff).
+ *    SDL_GpuCreateShader on unsupported stuff).
  *  - Modern GPUs expect you to draw triangles, lines, or points.
  *    There are no quads or complex polygons. You can build them out of
  *    triangles yourself when you need them.
@@ -227,11 +227,12 @@ typedef struct SDL_GpuTextureDescription
 typedef struct SDL_GpuTexture SDL_GpuTexture;
 SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
 void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
-void SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
+int SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
 
 /* compiling shaders is a different (and optional at runtime) piece, in SDL_gpu_compiler.h */
 typedef struct SDL_GpuShader SDL_GpuShader;
-SDL_GpuShader *SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);
+SDL_GpuShader *SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);  /* !!! FIXME: bytecode type enum? */
+/* !!! FIXME: add a query for platform/gpu specific blob that can be fed back next time for faster load times? */
 void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
 

From bc0efb32f8c18520d2afd0566658a6821e99dea0 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 14:26:08 -0400
Subject: [PATCH 28/54] gpu: Allow caller to enumerate and select backend
 drivers.

---
 include/SDL_gpu.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 0d098fc270214..77a306a1c605f 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -148,14 +148,23 @@ extern "C" {
  */
 
 /*
- *  !!! FIXME: enumerate lowlevel APIs? In theory a Windows machine
- *   could offer all of Direct3D 9-12, Vulkan, OpenGL, GLES, etc...
+ * You can get a list of driver names that were compiled into this build of
+ * SDL. These are not all guaranteed to _work_ at runtime, but they are the
+ * backends that are potentially available. You do not have to iterate
+ * these; if you request a NULL driver to SDL_GpuCreateDevice, SDL will try
+ * to make the best decision for the current system, but if you want to
+ * try multiple drivers or log them for debugging, etc, here you go.
+ *
+ * The list of drivers is static and based on what was compiled into SDL; it
+ * does not change between calls to these functions.
  */
+Uint32 SDL_GpuGetNumDrivers(void);
+const char *SDL_GpuGetDriverName(Uint32 index);
 
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
 typedef struct SDL_GpuDevice SDL_GpuDevice;
-SDL_GpuDevice *SDL_GpuCreateDevice(const char *label);  /* `label` is for debugging, not a specific device name to access. */
+SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, const char *driver);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */

From 1bb2294845db85c55cd81b14166fbfee3112129a Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 14:28:23 -0400
Subject: [PATCH 29/54] gpu: Submitting command buffers needs to take a device
 object.

This lets us verify that all command buffers in question belong to the
same device, and maybe deal with weird cases where we submit no buffers
but want to trigger a present to the window, or add a fence that fires
when the queue hits this point, I don't know.
---
 include/SDL_gpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 77a306a1c605f..d85e8cd1c0bcc 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -693,7 +693,7 @@ typedef enum SDL_GpuPresentType
  * If this command buffer is to present to a window, specify a non-NULL present_window.
  *  presenttype is ignored if this isn't a render pass using a window's backbuffer.
  */
-void SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
+int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
    abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */

From 97e61f89c20659019a788def2271eee626bdb2bf Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 14:32:12 -0400
Subject: [PATCH 30/54] gpu: Most of the void APIs should be able to return an
 error.

Most apps aren't going to check every return code as they set
a new pipeline in a render pass or whatever, but these are things
that are vulnerable to resource exhaustion, so they _should_ be
able to report failure anyhow.
---
 include/SDL_gpu.h | 56 ++++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index d85e8cd1c0bcc..1a0ca1037abcf 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -167,12 +167,14 @@ typedef struct SDL_GpuDevice SDL_GpuDevice;
 SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, const char *driver);  /* `label` is for debugging, not a specific device name to access. */
 void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
 
+/* !!! FIXME: device caps */
+
 /* CPU buffers live in RAM and can be accessed by the CPU. */
 typedef struct SDL_GpuCpuBuffer SDL_GpuCpuBuffer;
 SDL_GpuCpuBuffer *SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
 void SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer);
 void *SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen);
-void SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
+int SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
 
 /*
  * GPU buffers live in GPU-specific memory and can not be accessed by the CPU.
@@ -184,7 +186,7 @@ void SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
  *  upload is complete: SDL_GpuCreateAndInitBuffer
  */
 typedef struct SDL_GpuBuffer SDL_GpuBuffer;
-SDL_GpuBuffer *SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 length);
+SDL_GpuBuffer *SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen);
 void SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer);
 
 
@@ -452,7 +454,7 @@ void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
    start with an existing pipeline's state) then change what you like.
    Note that the `label` and shader fields are read-only; do not modify or free them! */
 void SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc);
-void SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
+int SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
 
 
 
@@ -600,19 +602,19 @@ SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffe
  *   was set to at the time. Try not to encode redundant state changes into a render pass
  *   as they will take resources to do nothing.
  */
-void SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
+int SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
 
-void SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
-void SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
-void SDL_GpuSetRenderBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);
+int SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
+int SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
+int SDL_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);
 
-void SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
-void SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
-void SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+int SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+int SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+int SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
-void SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
-void SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
-void SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+int SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+int SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+int SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
 
 /* Drawing! */
@@ -623,43 +625,44 @@ typedef enum SDL_GpuIndexType
     SDL_GPUINDEXTYPE_UINT32
 } SDL_GpuIndexType;
 
-void SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
-void SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
-void SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
-void SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
+int SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
+int SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
+int SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
+int SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
 
 /* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
-void SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
+int SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
 
 /* start encoding a blit pass to a command buffer. You can only encode one type of pass to a command buffer at a time.  End this pass to start encoding another. */
 typedef struct SDL_GpuBlitPass SDL_GpuBlitPass;
 SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf);
-void SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+int SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                  Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                  Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                  SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
                                  Uint32 dstx, Uint32 dsty, Uint32 dstz);
 
-void SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, unsigned char value);
+int SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value);
 
-void SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
+int SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
 
-void SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
-void SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
 
-void SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
+int SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
                                      Uint32 srcpitch, Uint32 srcimgpitch,
                                      Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                      SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
                                      Uint32 dstx, Uint32 dsty, Uint32 dstz);
 
-void SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+int SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                      Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                      Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                      SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
 
 /* Done encoding this blit pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
-void SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
+int SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
 
 
 /*
@@ -747,7 +750,6 @@ SDL_GpuFence *SDL_GpuNextFenceCycle(SDL_GpuFenceCycle *cycle);
 SDL_GpuFence **SDL_GpuNextFencePtrCycle(SDL_GpuFenceCycle *cycle);
 void SDL_GpuDestroyFenceCycle(SDL_GpuFenceCycle *cycle);
 
-
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 }

From 5837f3bab0691a88def24e7871c67d7e2418898b Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 14:35:35 -0400
Subject: [PATCH 31/54] gpu: Fill in a first shot at the high-level
 implementation.

(Also update test programs to match changes to the API.)
---
 src/gpu/SDL_gpu.c            | 718 ++++++++++++++++++++++++++++++-----
 src/gpu/SDL_sysgpu.h         | 125 +++++-
 test/testgpu_simple_clear.c  |   4 +-
 test/testgpu_spinning_cube.c |  12 +-
 4 files changed, 756 insertions(+), 103 deletions(-)

diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index b184cb4458179..c7fe0ab2974f2 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -25,106 +25,446 @@
 #include "SDL.h"
 #include "SDL_sysgpu.h"
 
+extern const SDL_GpuDriver DUMMY_GpuDriver;
+
+static const SDL_GpuDriver *gpu_drivers[] = {
+#if 0
+    &DUMMY_GpuDriver
+#endif
+};
+
+Uint32
+SDL_GpuGetNumDrivers(void)
+{
+    return (Uint32) SDL_arraysize(gpu_drivers);
+}
+
+const char *
+SDL_GpuGetDriverName(Uint32 index)
+{
+    const Uint32 numdrivers = (Uint32) SDL_arraysize(gpu_drivers);
+    if (index >= numdrivers) {
+        SDL_SetError("index must be in the range of 0 - %u", (unsigned int) (numdrivers ? (numdrivers - 1) : 0));
+        return NULL;
+    }
+    return gpu_drivers[index]->name;
+}
+
+/* helper function since lots of things need an object and a label allocated. */
+static void *allocate_obj_and_string(const size_t objlen, const char *str, char **allocatedstr)
+{
+    void *retval;
+
+    SDL_assert(str != NULL);
+    SDL_assert(allocatedstr != NULL);
+    SDL_assert(objlen > 0);
+
+    *allocatedstr = NULL;
+    retval = SDL_calloc(1, objlen);
+    if (!retval) {
+        SDL_OutOfMemory();
+        return NULL;
+    }
+
+    if (str) {
+        *allocatedstr = SDL_strdup(str);
+        if (!*allocatedstr) {
+            SDL_free(retval);
+            SDL_OutOfMemory();
+            return NULL;
+        }
+    }
+
+    return retval;
+}
+
+#define ALLOC_OBJ_WITH_LABEL(typ, var, str) { \
+    char *cpystr; \
+    var = (typ *) allocate_obj_and_string(sizeof (typ), str, &cpystr); \
+    if (var != NULL) { \
+        var->label = cpystr; \
+    } \
+}
+
+#define FREE_AND_NULL_OBJ_WITH_LABEL(obj) { \
+    SDL_free((void *) obj->label); \
+    SDL_free(obj); \
+    obj = NULL; \
+}
+
+#define ALLOC_OBJ_WITH_DESC(typ, var, dsc) { \
+    char *cpystr; \
+    var = (typ *) allocate_obj_and_string(sizeof (typ), dsc->label, &cpystr); \
+    if (var != NULL) { \
+        SDL_memcpy(&var->desc, dsc, sizeof (*dsc));\
+        var->desc.label = cpystr; \
+    } \
+}
+
+#define FREE_AND_NULL_OBJ_WITH_DESC(obj) { \
+    SDL_free((void *) obj->desc.label); \
+    SDL_free(obj); \
+    obj = NULL; \
+}
+
+
 /* !!! FIXME: change this API to allow selection of a specific GPU? */
+static int
+GpuCreateDeviceInternal(SDL_GpuDevice *device, const char *driver)
+{
+    size_t i;
+
+    if (driver) {  /* if a specific driver requested, succeed or fail without trying others. */
+        for (i = 0; i < SDL_arraysize(gpu_drivers); i++) {
+            const SDL_GpuDriver *thisdriver = gpu_drivers[i];
+            if (SDL_strcasecmp(driver, thisdriver->name) == 0) {
+                return thisdriver->CreateDevice(device);
+            }
+        }
+        return SDL_SetError("GPU driver '%s' not found", driver);  /* possibly misnamed, possibly not built in */
+    }
+
+
+    /* !!! FIXME: add a hint to SDL_hints.h later, but that will make merging later harder if done now. */
+    driver = SDL_GetHint(/*SDL_HINT_GPU_DRIVER*/ "SDL_GPU_DRIVER");
+    if (driver) {
+        for (i = 0; i < SDL_arraysize(gpu_drivers); i++) {
+            const SDL_GpuDriver *thisdriver = gpu_drivers[i];
+            if (SDL_strcasecmp(driver, thisdriver->name) == 0) {
+                if (thisdriver->CreateDevice(device) == 0) {
+                    return 0;
+                }
+            }
+        }
+    }
+
+    /* Still here? Take the first one that works. */
+    for (i = 0; i < SDL_arraysize(gpu_drivers); i++) {
+        const SDL_GpuDriver *thisdriver = gpu_drivers[i];
+        if (!driver || (SDL_strcasecmp(driver, thisdriver->name) != 0)) {
+            if (thisdriver->CreateDevice(device) == 0) {
+                return 0;
+            }
+        }
+    }
+
+    return SDL_SetError("Couldn't find an available GPU driver");
+}
+
 SDL_GpuDevice *
-SDL_GpuCreateDevice(const char *label)
+SDL_GpuCreateDevice(const char *label, const char *driver)
 {
+    SDL_GpuDevice *device;
+    ALLOC_OBJ_WITH_LABEL(SDL_GpuDevice, device, label);
+
+    if (device != NULL) {
+        if (GpuCreateDeviceInternal(device, driver) == -1) {
+            FREE_AND_NULL_OBJ_WITH_LABEL(device);
+        }
+    }
+    return device;
 }
 
 void
 SDL_GpuDestroyDevice(SDL_GpuDevice *device)
 {
+    if (device) {
+        device->DestroyDevice(device);
+        FREE_AND_NULL_OBJ_WITH_LABEL(device);
+    }
 }
 
 SDL_GpuCpuBuffer *
 SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
 {
+    SDL_GpuCpuBuffer *buffer = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (buflen == 0) {
+        SDL_InvalidParamError("buflen");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuCpuBuffer, buffer, label);
+        if (buffer != NULL) {
+            buffer->device = device;
+            buffer->buflen = buflen;
+            if (device->CreateCpuBuffer(buffer, data) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(buffer);
+            }
+        }
+    }
+    return buffer;
 }
 
 void
 SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
 {
+    if (buffer) {
+        buffer->device->DestroyCpuBuffer(buffer);
+        FREE_AND_NULL_OBJ_WITH_LABEL(buffer);
+    }
 }
 
 void *
 SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen)
 {
+    void *retval = NULL;
+    if (!buffer) {
+        SDL_InvalidParamError("buffer");
+    } else {
+        retval = buffer->device->LockCpuBuffer(buffer);
+    }
+
+    if (_buflen) {
+        *_buflen = retval ? buffer->buflen : 0;
+    }
+    return retval;
 }
 
-void
+int
 SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
 {
+    if (!buffer) {
+        return SDL_InvalidParamError("buffer");
+    }
+    return buffer->device->UnlockCpuBuffer(buffer);
 }
 
 SDL_GpuBuffer *
-SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 length)
+SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen)
 {
+    SDL_GpuBuffer *buffer = NULL;
+
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (buflen == 0) {
+        SDL_InvalidParamError("buflen");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuBuffer, buffer, label);
+        if (buffer != NULL) {
+            buffer->device = device;
+            buffer->buflen = buflen;
+            if (device->CreateBuffer(buffer) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(buffer);
+            }
+        }
+    }
+    return buffer;
 }
 
 void
 SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer)
 {
+    if (buffer) {
+        buffer->device->DestroyBuffer(buffer);
+        FREE_AND_NULL_OBJ_WITH_LABEL(buffer);
+    }
 }
 
-
 SDL_GpuTexture *
 SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc)
 {
+    SDL_GpuTexture *texture = NULL;
+
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (!desc) {
+        SDL_InvalidParamError("desc");
+    } else {
+        ALLOC_OBJ_WITH_DESC(SDL_GpuTexture, texture, desc);
+        if (texture != NULL) {
+            texture->device = device;
+            if (device->CreateTexture(texture) == -1) {
+                FREE_AND_NULL_OBJ_WITH_DESC(texture);
+            }
+        }
+    }
+    return texture;
 }
 
-void
+int
 SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc)
 {
+    if (!texture) {
+        return SDL_InvalidParamError("pipeline");
+    } else if (!desc) {
+        return SDL_InvalidParamError("desc");
+    }
+    SDL_memcpy(desc, &texture->desc, sizeof (*desc));
+    return 0;
 }
 
 void
 SDL_GpuDestroyTexture(SDL_GpuTexture *texture)
 {
+    if (texture) {
+        texture->device->DestroyTexture(texture);
+        FREE_AND_NULL_OBJ_WITH_DESC(texture);
+    }
 }
 
 SDL_GpuShader *
-SDL_GpuLoadShader(SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen)
+SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen)
 {
+    SDL_GpuShader *shader = NULL;
+
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (!bytecode) {
+        SDL_InvalidParamError("bytecode");
+    } else if (bytecodelen == 0) {
+        SDL_InvalidParamError("bytecodelen");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuShader, shader, label);
+        if (shader != NULL) {
+            shader->device = device;
+            SDL_AtomicSet(&shader->refcount, 1);
+            if (device->CreateShader(shader, bytecode, bytecodelen) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(shader);
+            }
+        }
+    }
+    return shader;
 }
 
 void
 SDL_GpuDestroyShader(SDL_GpuShader *shader)
 {
+    if (shader) {
+        if (SDL_AtomicDecRef(&shader->refcount)) {
+            shader->device->DestroyShader(shader);
+            FREE_AND_NULL_OBJ_WITH_LABEL(shader);
+        }
+    }
 }
 
 SDL_GpuTexture *
 SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
 {
+    if (!device) {
+        SDL_InvalidParamError("device");
+        return NULL;
+    } else if (!window) {
+        SDL_InvalidParamError("window");
+        return NULL;
+    }
+    return device->GetBackbuffer(device, window);
 }
 
 SDL_GpuPipeline *
 SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc)
 {
+    SDL_GpuPipeline *pipeline = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (!desc) {
+        SDL_InvalidParamError("desc");
+    } else if (desc->vertex_shader && (desc->vertex_shader->device != device)) {
+        SDL_SetError("vertex shader is not from this device");
+    } else if (desc->fragment_shader && (desc->fragment_shader->device != device)) {
+        SDL_SetError("fragment shader is not from this device");
+    } else {
+        ALLOC_OBJ_WITH_DESC(SDL_GpuPipeline, pipeline, desc);
+        if (pipeline != NULL) {
+            pipeline->device = device;
+            if (device->CreatePipeline(pipeline) == -1) {
+                FREE_AND_NULL_OBJ_WITH_DESC(pipeline);
+            } else {
+                if (pipeline->desc.vertex_shader) {
+                    SDL_AtomicIncRef(&pipeline->desc.vertex_shader->refcount);
+                }
+                if (pipeline->desc.fragment_shader) {
+                    SDL_AtomicIncRef(&pipeline->desc.fragment_shader->refcount);
+                }
+            }
+        }
+    }
+    return pipeline;
 }
 
 void
 SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline)
 {
+    if (pipeline) {
+        SDL_GpuShader *vshader = pipeline->desc.vertex_shader;
+        SDL_GpuShader *fshader = pipeline->desc.fragment_shader;
+
+        pipeline->device->DestroyPipeline(pipeline);
+        FREE_AND_NULL_OBJ_WITH_DESC(pipeline);
+
+        /* decrement reference counts (and possibly destroy) the shaders. */
+        SDL_GpuDestroyShader(vshader);
+        SDL_GpuDestroyShader(fshader);
+    }
 }
 
 void
 SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc)
 {
+    /* !!! FIXME: decide if these are reasonable defaults. */
+    SDL_zerop(desc);
+    desc->primitive = SDL_GPUPRIM_TRIANGLESTRIP;
+    desc->num_vertex_attributes = 1;
+    desc->vertices[0].format = SDL_GPUVERTFMT_FLOAT4;
+    desc->num_color_attachments = 1;
+    desc->color_attachments[0].pixel_format = SDL_GPUPIXELFMT_RGBA8;
+    desc->color_attachments[0].writemask_enabled_red = SDL_TRUE;
+    desc->color_attachments[0].writemask_enabled_blue = SDL_TRUE;
+    desc->color_attachments[0].writemask_enabled_green = SDL_TRUE;
+    desc->color_attachments[0].writemask_enabled_alpha = SDL_TRUE;
+    desc->depth_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
+    desc->stencil_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
+    desc->depth_write_enabled = SDL_TRUE;
+    desc->stencil_read_mask = 0xFFFFFFFF;
+    desc->stencil_write_mask = 0xFFFFFFFF;
+    desc->depth_function = SDL_GPUCMPFUNC_LESS;
+    desc->stencil_function = SDL_GPUCMPFUNC_ALWAYS;
+    desc->stencil_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_and_stencil_pass = SDL_GPUSTENCILOP_KEEP;
+    desc->fill_mode = SDL_GPUFILL_FILL;
+    desc->front_face = SDL_GPUFRONTFACE_COUNTER_CLOCKWISE;
+    desc->cull_face = SDL_GPUCULLFACE_BACK;
 }
 
-void
+int
 SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc)
 {
+    if (!pipeline) {
+        return SDL_InvalidParamError("pipeline");
+    } else if (!desc) {
+        return SDL_InvalidParamError("desc");
+    }
+    SDL_memcpy(desc, &pipeline->desc, sizeof (*desc));
+    return 0;
 }
 
 SDL_GpuSampler *
 SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc)
 {
+    SDL_GpuSampler *sampler = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (!desc) {
+        SDL_InvalidParamError("desc");
+    } else {
+        ALLOC_OBJ_WITH_DESC(SDL_GpuSampler, sampler, desc);
+        if (sampler != NULL) {
+            sampler->device = device;
+            if (device->CreateSampler(sampler) == -1) {
+                FREE_AND_NULL_OBJ_WITH_DESC(sampler);
+            }
+        }
+    }
+    return sampler;
 }
 
 void
 SDL_GpuDestroySampler(SDL_GpuSampler *sampler)
 {
+    if (sampler) {
+        sampler->device->DestroySampler(sampler);
+        FREE_AND_NULL_OBJ_WITH_DESC(sampler);
+    }
 }
 
 
@@ -334,48 +674,26 @@ void nuke_sampler(const void *key, const void *value, void *data)
 SDL_GpuStateCache *
 SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
 {
-    SDL_GpuStateCache *cache = (SDL_GpuStateCache *) SDL_calloc(1, sizeof (SDL_GpuStateCache));
-    if (!cache) {
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
-    cache->pipeline_mutex = SDL_CreateMutex();
-    if (!cache->pipeline_mutex) {
-        goto failed;
-    }
-
-    cache->sampler_mutex = SDL_CreateMutex();
-    if (!cache->sampler_mutex) {
-        goto failed;
-    }
-
-    if (label) {
-        cache->label = SDL_strdup(label);
-        if (!cache->label) {
-            SDL_OutOfMemory();
-            goto failed;
+    SDL_GpuStateCache *cache = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuStateCache, cache, label);
+        if (cache != NULL) {
+            /* !!! FIXME: adjust hash table bucket counts? */
+            cache->device = device;
+            cache->pipeline_mutex = SDL_CreateMutex();
+            cache->sampler_mutex = SDL_CreateMutex();
+            cache->pipeline_cache = SDL_NewHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
+            cache->sampler_cache = SDL_NewHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
+            if (!cache->pipeline_mutex || !cache->sampler_mutex || !cache->pipeline_cache || !cache->sampler_cache) {
+                SDL_GpuDestroyStateCache(cache);  /* can clean up half-created objects. */
+                cache = NULL;
+            }
         }
     }
 
-    /* !!! FIXME: adjust hash table bucket counts? */
-
-    cache->pipeline_cache = SDL_NewHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
-    if (!cache->pipeline_cache) {
-        goto failed;
-    }
-
-    cache->sampler_cache = SDL_NewHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
-    if (!cache->sampler_cache) {
-        goto failed;
-    }
-
-    cache->device = device;
     return cache;
-
-failed:
-    SDL_GpuDestroyStateCache(cache);  /* can clean up half-created objects. */
-    return NULL;
 }
 
 #define GETCACHEDOBJIMPL(ctyp, typ) \
@@ -422,17 +740,28 @@ SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache)
         SDL_FreeHashTable(cache->pipeline_cache);
         SDL_DestroyMutex(cache->sampler_mutex);
         SDL_FreeHashTable(cache->sampler_cache);
-        SDL_free((void *) cache->label);
-        SDL_free(cache);
+        FREE_AND_NULL_OBJ_WITH_LABEL(cache);
     }
 }
 
 SDL_GpuCommandBuffer *
 SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device)
 {
+    SDL_GpuCommandBuffer *cmdbuf = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuCommandBuffer, cmdbuf, label);
+        if (cmdbuf != NULL) {
+            cmdbuf->device = device;
+            if (device->CreateCommandBuffer(cmdbuf) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(cmdbuf);
+            }
+        }
+    }
+    return cmdbuf;
 }
 
-
 SDL_GpuRenderPass *
 SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
                        Uint32 num_color_attachments,
@@ -440,175 +769,376 @@ SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
                        const SDL_GpuDepthAttachmentDescription *depth_attachment,
                        const SDL_GpuStencilAttachmentDescription *stencil_attachment)
 {
+    SDL_GpuRenderPass *pass = NULL;
+    if (!cmdbuf) {
+        SDL_InvalidParamError("cmdbuf");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuRenderPass, pass, label);
+        if (pass != NULL) {
+            pass->device = cmdbuf->device;
+            pass->cmdbuf = cmdbuf;
+            if (pass->device->StartRenderPass(pass, num_color_attachments, color_attachments, depth_attachment, stencil_attachment) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+            }
+        }
+    }
+    return pass;
 }
 
-
-void
+int
 SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
 {
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    }
+    /* !!! FIXME: can we set a NULL pipeline? */
+    return pass->device->SetRenderPassPipeline(pass, pipeline);
 }
 
-void
+int
 SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar)
 {
+    return pass ? pass->device->SetRenderPassViewport(pass, x, y, width, height, znear, zfar) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height)
 {
+    return pass ? pass->device->SetRenderPassScissor(pass, x, y, width, height) : SDL_InvalidParamError("pass");
 }
 
-void
-SDL_GpuSetRenderBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha)
+int
+SDL_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha)
 {
+    return pass ? pass->device->SetRenderPassBlendConstant(pass, red, green, blue, alpha) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassVertexBuffer(pass, buffer, offset, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassVertexSampler(pass, sampler, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassVertexTexture(pass, texture, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassFragmentBuffer(pass, buffer, offset, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassFragmentSampler(pass, sampler, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
 {
+    return pass ? pass->device->SetRenderPassFragmentTexture(pass, texture, index) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count)
 {
+    return pass ? pass->device->Draw(pass, vertex_start, vertex_count) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset)
 {
+    return pass ? pass->device->DrawIndexed(pass, index_count, index_type, index_buffer, index_offset) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance)
 {
+    return pass ? pass->device->DrawInstanced(pass, vertex_start, vertex_count, instance_count, base_instance) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance)
 {
+    return pass ? pass->device->DrawInstancedIndexed(pass, index_count, index_type, index_buffer, index_offset, instance_count, base_instance) : SDL_InvalidParamError("pass");
 }
 
-
-void
+int
 SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
 {
+    int retval;
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    }
+
+    retval = pass->device->EndRenderPass(pass);
+    if (retval == 0) {
+        FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+    }
+    return retval;
 }
 
 
 SDL_GpuBlitPass *
 SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
 {
+    SDL_GpuBlitPass *pass = NULL;
+    if (!cmdbuf) {
+        SDL_InvalidParamError("cmdbuf");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuBlitPass, pass, label);
+        if (pass != NULL) {
+            pass->device = cmdbuf->device;
+            pass->cmdbuf = cmdbuf;
+            if (pass->device->StartBlitPass(pass) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+            }
+        }
+    }
+    return pass;
 }
 
-void
+int
 SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
-                                 Uint32 srcx, Uint32 srcy, Uint32 srcz,
-                                 Uint32 srcw, Uint32 srch, Uint32 srcdepth,
-                                 SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
-                                 Uint32 dstx, Uint32 dsty, Uint32 dstz)
-{
+                           Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                           Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                           SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                           Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srctex) {
+        return SDL_InvalidParamError("srctex");
+    } else if (!dsttex) {
+        return SDL_InvalidParamError("dsttex");
+    }
+    /* !!! FIXME: check levels, slices, etc. */
+    return pass->device->CopyBetweenTextures(pass, srctex, srcslice, srclevel, srcx, srcy, srcz, srcw, srch, srcdepth, dsttex, dstslice, dstlevel, dstx, dsty, dstz);
 }
 
-void
-SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, unsigned char value)
-{
+int
+SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value)
+{
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!buffer) {
+        return SDL_InvalidParamError("buffer");
+    } else if ((offset+length) > buffer->buflen) {
+        return SDL_SetError("offset+length overflows the buffer");  /* !!! FIXME: should we clamp instead so you can fully initialize without knowing the size? */
+    }
+    return pass ? pass->device->FillBuffer(pass, buffer, offset, length, value) : SDL_InvalidParamError("pass");
 }
 
-void
+int
 SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
 {
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!texture) {
+        return SDL_InvalidParamError("texture");
+    }
+    return pass->device->GenerateMipmaps(pass, texture);
 }
 
-void
+int
 SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srcbuf) {
+        return SDL_InvalidParamError("srcbuf");
+    } else if (!dstbuf) {
+        return SDL_InvalidParamError("dstbuf");
+    } else if ((srcoffset+length) > srcbuf->buflen) {
+        return SDL_SetError("srcoffset+length overflows the source buffer");
+    } else if ((dstoffset+length) > dstbuf->buflen) {
+        return SDL_SetError("dstoffset+length overflows the destination buffer");
+    }
+    return pass->device->CopyBufferCpuToGpu(pass, srcbuf, srcoffset, dstbuf, dstoffset, length);
 }
 
-void
+int
 SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srcbuf) {
+        return SDL_InvalidParamError("srcbuf");
+    } else if (!dstbuf) {
+        return SDL_InvalidParamError("dstbuf");
+    } else if ((srcoffset+length) > srcbuf->buflen) {
+        return SDL_SetError("srcoffset+length overflows the source buffer");
+    } else if ((dstoffset+length) > dstbuf->buflen) {
+        return SDL_SetError("dstoffset+length overflows the destination buffer");
+    }
+    return pass->device->CopyBufferGpuToCpu(pass, srcbuf, srcoffset, dstbuf, dstoffset, length);
 }
 
-void
+int
+SDL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srcbuf) {
+        return SDL_InvalidParamError("srcbuf");
+    } else if (!dstbuf) {
+        return SDL_InvalidParamError("dstbuf");
+    } else if ((srcoffset+length) > srcbuf->buflen) {
+        return SDL_SetError("srcoffset+length overflows the source buffer");
+    } else if ((dstoffset+length) > dstbuf->buflen) {
+        return SDL_SetError("dstoffset+length overflows the destination buffer");
+    }
+    return pass->device->CopyBufferGpuToGpu(pass, srcbuf, srcoffset, dstbuf, dstoffset, length);
+}
+
+int
 SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
-                                     Uint32 srcpitch, Uint32 srcimgpitch,
-                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
-                                     SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
-                                     Uint32 dstx, Uint32 dsty, Uint32 dstz)
-{
+                               Uint32 srcpitch, Uint32 srcimgpitch,
+                               Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                               SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
+                               Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srcbuf) {
+        return SDL_InvalidParamError("srcbuf");
+    } else if (!dsttex) {
+        return SDL_InvalidParamError("dsttex");
+    }
+    /* !!! FIXME: check other param ranges */
+    return pass->device->CopyFromBufferToTexture(pass, srcbuf, srcoffset, srcpitch, srcimgpitch, srcw, srch, srcdepth, dsttex, dstslice, dstlevel, dstx, dsty, dstz);
 }
 
-void
+int
 SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
-                                     Uint32 srcx, Uint32 srcy, Uint32 srcz,
-                                     Uint32 srcw, Uint32 srch, Uint32 srcdepth,
-                                     SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch)
-{
+                               Uint32 srcx, Uint32 srcy, Uint32 srcz,
+                               Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                               SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch)
+{
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    } else if (!srctex) {
+        return SDL_InvalidParamError("srctex");
+    } else if (!dstbuf) {
+        return SDL_InvalidParamError("dstbuf");
+    }
+    /* !!! FIXME: check other param ranges */
+    return pass->device->CopyFromTextureToBuffer(pass, srctex, srcslice, srclevel, srcx, srcy, srcz, srcw, srch, srcdepth, dstbuf, dstoffset, dstpitch, dstimgpitch);
 }
 
-void
+int
 SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
 {
+    int retval;
+    if (!pass) {
+        return SDL_InvalidParamError("pass");
+    }
+
+    retval = pass->device->EndBlitPass(pass);
+    if (retval == 0) {
+        FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+    }
+    return retval;
 }
 
 SDL_GpuFence *
 SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device)
 {
+    SDL_GpuFence *fence = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else {
+        ALLOC_OBJ_WITH_LABEL(SDL_GpuFence, fence, label);
+        if (fence != NULL) {
+            fence->device = device;
+            if (device->CreateFence(fence) == -1) {
+                FREE_AND_NULL_OBJ_WITH_LABEL(fence);
+            }
+        }
+    }
+    return fence;
 }
 
 void
 SDL_GpuDestroyFence(SDL_GpuFence *fence)
 {
+    if (fence) {
+        fence->device->DestroyFence(fence);
+        FREE_AND_NULL_OBJ_WITH_LABEL(fence);
+    }
 }
 
 int
 SDL_GpuQueryFence(SDL_GpuFence *fence)
 {
+    return fence ? fence->device->QueryFence(fence) : SDL_InvalidParamError("fence");
 }
 
 int
 SDL_GpuResetFence(SDL_GpuFence *fence)
 {
+    return fence ? fence->device->ResetFence(fence) : SDL_InvalidParamError("fence");
 }
 
 int
 SDL_GpuWaitFence(SDL_GpuFence *fence)
 {
+    return fence ? fence->device->WaitFence(fence) : SDL_InvalidParamError("fence");
 }
 
-void
-SDL_GpuSubmitCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence)
+int
+SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence)
 {
+    int retval;
+    Uint32 i;
+
+    if (!device) {
+        return SDL_InvalidParamError("device");
+    } else if (fence && (fence->device != device)) {
+        return SDL_SetError("Fence is not from this device");
+    }
+
+    for (i = 0; i < numcmdbufs; i++) {
+        if (!buffers[i]) {
+            return SDL_SetError("Can't submit a NULL command buffer");
+        } else if (buffers[i]->device != device) {
+            return SDL_SetError("Command buffer is not from this device");
+        }
+    }
+
+    retval = device->SubmitCommandBuffers(device, buffers, numcmdbufs, presenttype, fence);
+
+    if (retval == 0) {
+        for (i = 0; i < numcmdbufs; i++) {
+            FREE_AND_NULL_OBJ_WITH_LABEL(buffers[i]);
+        }
+    }
+
+    return retval;
 }
 
 void
 SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs)
 {
+    if (buffers) {
+        Uint32 i;
+        for (i = 0; i < numcmdbufs; i++) {
+            if (buffers[i]) {
+                buffers[i]->device->AbandonCommandBuffer(buffers[i]);
+                FREE_AND_NULL_OBJ_WITH_LABEL(buffers[i]);
+            }
+        }
+    }
 }
 
 SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
@@ -635,7 +1165,7 @@ SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *devi
          ((blit = SDL_GpuStartBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
         SDL_GpuCopyBufferCpuToGpu(blit, staging, 0, gpubuf, 0, buflen);
         SDL_GpuEndBlitPass(blit);
-        SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_GPUPRESENT_NONE, fence);
+        SDL_GpuSubmitCommandBuffers(device, &cmd, 1, SDL_GPUPRESENT_NONE, fence);
         SDL_GpuWaitFence(fence);  /* so we know it's definitely uploaded */
         retval = gpubuf;
     }
@@ -650,6 +1180,8 @@ SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *devi
     return retval;
 }
 
+/* !!! FIXME: SDL_GpuCreateAndInitTexture */
+
 SDL_GpuTexture *
 SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depthtex)
 {
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index a226a24a12e33..ab973768aeae0 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -27,22 +27,138 @@
 #include "SDL_gpu.h"
 #include "../SDL_hashtable.h"
 
-struct SDL_GpuDevice
+struct SDL_GpuCpuBuffer
+{
+    SDL_GpuDevice *device;
+    const char *label;
+    Uint32 buflen;
+};
+
+struct SDL_GpuBuffer
+{
+    SDL_GpuDevice *device;
+    const char *label;
+    Uint32 buflen;
+};
+
+struct SDL_GpuTexture
 {
-//    SDL_GpuBuffer *(*CreateCPUBuffer)(SDL_GpuDevice *_this, const Uint32 buflen);
+    SDL_GpuDevice *device;
+    SDL_GpuTextureDescription desc;
+};
 
+struct SDL_GpuShader
+{
+    SDL_GpuDevice *device;
+    const char *label;
+    SDL_atomic_t refcount;
 };
 
 struct SDL_GpuPipeline
 {
+    SDL_GpuDevice *device;
     SDL_GpuPipelineDescription desc;
 };
 
 struct SDL_GpuSampler
 {
+    SDL_GpuDevice *device;
     SDL_GpuSamplerDescription desc;
 };
 
+struct SDL_GpuCommandBuffer
+{
+    SDL_GpuDevice *device;
+    const char *label;
+};
+
+struct SDL_GpuRenderPass
+{
+    SDL_GpuDevice *device;
+    const char *label;
+    SDL_GpuCommandBuffer *cmdbuf;
+};
+
+struct SDL_GpuBlitPass
+{
+    SDL_GpuDevice *device;
+    const char *label;
+    SDL_GpuCommandBuffer *cmdbuf;
+};
+
+struct SDL_GpuFence
+{
+    SDL_GpuDevice *device;
+    const char *label;
+};
+
+struct SDL_GpuDevice
+{
+    const char *label;
+
+    void (*DestroyDevice)(SDL_GpuDevice *device);
+
+    int (*CreateCpuBuffer)(SDL_GpuCpuBuffer *buffer, const void *data);
+    void (*DestroyCpuBuffer)(SDL_GpuCpuBuffer *buffer);
+    void *(*LockCpuBuffer)(SDL_GpuCpuBuffer *buffer);
+    int (*UnlockCpuBuffer)(SDL_GpuCpuBuffer *buffer);
+
+    int (*CreateBuffer)(SDL_GpuBuffer *buffer);
+    void (*DestroyBuffer)(SDL_GpuBuffer *buffer);
+
+    int (*CreateTexture)(SDL_GpuTexture *texture);
+    void (*DestroyTexture)(SDL_GpuTexture *texture);
+
+    int (*CreateShader)(SDL_GpuShader *shader, const Uint8 *bytecode, const Uint32 bytecodelen);
+    void (*DestroyShader)(SDL_GpuShader *shader);
+
+    SDL_GpuTexture *(*GetBackbuffer)(SDL_GpuDevice *device, SDL_Window *window);
+
+    int (*CreatePipeline)(SDL_GpuPipeline *pipeline);
+    void (*DestroyPipeline)(SDL_GpuPipeline *pipeline);
+
+    int (*CreateSampler)(SDL_GpuSampler *sampler);
+    void (*DestroySampler)(SDL_GpuSampler *sampler);
+
+    int (*CreateCommandBuffer)(SDL_GpuCommandBuffer *cmdbuf);
+    int (*SubmitCommandBuffers)(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
+    void (*AbandonCommandBuffer)(SDL_GpuCommandBuffer *buffer);
+
+    int (*StartRenderPass)(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment);
+    int (*SetRenderPassPipeline)(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
+    int (*SetRenderPassViewport)(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar);
+    int (*SetRenderPassScissor)(SDL_GpuRenderPass *pass, double x, double y, double width, double height);
+    int (*SetRenderPassBlendConstant)(SDL_GpuRenderPass *pass, double red, double green, double blue, double alpha);
+    int (*SetRenderPassVertexBuffer)(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index);
+    int (*SetRenderPassVertexSampler)(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index);
+    int (*SetRenderPassVertexTexture)(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index);
+    int (*SetRenderPassFragmentBuffer)(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index);
+    int (*SetRenderPassFragmentSampler)(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index);
+    int (*SetRenderPassFragmentTexture)(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index);
+    int (*Draw)(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
+    int (*DrawIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
+    int (*DrawInstanced)(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
+    int (*DrawInstancedIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
+    int (*EndRenderPass)(SDL_GpuRenderPass *pass);
+
+    int (*StartBlitPass)(SDL_GpuBlitPass *pass);
+    int (*CopyBetweenTextures)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz);
+    int (*FillBuffer)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value);
+    int (*GenerateMipmaps)(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
+    int (*CopyBufferCpuToGpu)(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+    int (*CopyBufferGpuToCpu)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+    int (*CopyBufferGpuToGpu)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+    int (*CopyFromBufferToTexture)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz);
+    int (*CopyFromTextureToBuffer)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
+    int (*EndBlitPass)(SDL_GpuBlitPass *pass);
+
+    int (*CreateFence)(SDL_GpuFence *fence);
+    void (*DestroyFence)(SDL_GpuFence *fence);
+    int (*QueryFence)(SDL_GpuFence *fence);
+    int (*ResetFence)(SDL_GpuFence *fence);
+    int (*WaitFence)(SDL_GpuFence *fence);
+};
+
 /* Multiple mutexes might be overkill, but there's no reason to
    block all caches when one is being accessed. */
 struct SDL_GpuStateCache
@@ -55,6 +171,11 @@ struct SDL_GpuStateCache
     SDL_HashTable *sampler_cache;
 };
 
+typedef struct SDL_GpuDriver
+{
+    const char *name;
+    int (*CreateDevice)(SDL_GpuDevice *device);
+} SDL_GpuDriver;
 
 #endif /* SDL_sysgpu_h_ */
 
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index 2d0075468a587..3e42ad7fce971 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -38,7 +38,7 @@ static void quit(int rc)
 
 static void initGpu(void)
 {
-    gpuDevice = SDL_GpuCreateDevice("The GPU device");
+    gpuDevice = SDL_GpuCreateDevice("The GPU device", NULL);
     if (!gpuDevice) {
         SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to create GPU device: %s", SDL_GetError());
         quit(2);
@@ -79,7 +79,7 @@ static void render(SDL_Window *window)
     }
 
     /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
-    SDL_GpuSubmitCommandBuffers(&cmd, 1, SDL_GPUPRESENT_VSYNC, NULL);
+    SDL_GpuSubmitCommandBuffers(gpuDevice, &cmd, 1, SDL_GPUPRESENT_VSYNC, NULL);
 }
 
 int main(int argc, char **argv)
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index fba43bdd2fb95..80965e8267ef8 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -362,10 +362,10 @@ Render(SDL_Window *window, const int windownum)
     SDL_GpuDraw(render, 0, SDL_arraysize(vertex_data));
     SDL_GpuEndRenderPass(render);
 
-    SDL_GpuSubmitCommandBuffers(&cmd, 1, presenttype, NULL);  /* push work to the GPU and tell it to present to the window when done. */
+    SDL_GpuSubmitCommandBuffers(gpu_device, &cmd, 1, presenttype, NULL);  /* push work to the GPU and tell it to present to the window when done. */
 }
 
-static SDL_GpuShader *load_shader(const char *src, const char *type)
+static SDL_GpuShader *load_shader(const char *label, const char *src, const char *type)
 {
     SDL_GpuShader *retval = NULL;
     Uint8 *bytecode = NULL;
@@ -374,7 +374,7 @@ static SDL_GpuShader *load_shader(const char *src, const char *type)
         SDL_Log("Failed to compile %s shader: %s", type, SDL_GetError());
         quit(2);
     }
-    retval = SDL_GpuLoadShader(gpu_device, bytecode, bytecodelen);
+    retval = SDL_GpuCreateShader(label, gpu_device, bytecode, bytecodelen);
     if (!retval) {
         SDL_Log("Failed to load %s shader bytecode: %s", type, SDL_GetError());
         quit(2);
@@ -398,11 +398,11 @@ init_render_state(void)
 
     #define CHECK_CREATE(var, thing) { if (!(var)) { SDL_Log("Failed to create %s: %s\n", thing, SDL_GetError()); quit(2); } }
 
-    gpu_device = SDL_GpuCreateDevice("The GPU device");
+    gpu_device = SDL_GpuCreateDevice("The GPU device", NULL);
     CHECK_CREATE(gpu_device, "GPU device");
 
-    vertex_shader = load_shader(shader_vert_src, "vertex");
-    fragment_shader = load_shader(shader_frag_src, "fragment");
+    vertex_shader = load_shader("Spinning cube vertex shader", shader_vert_src, "vertex");
+    fragment_shader = load_shader("Spinning cube fragment shader", shader_frag_src, "fragment");
 
     /* We just need to upload the static data once. */
     render_state.gpubuf_static = SDL_GpuCreateAndInitBuffer("Static vertex data GPU buffer", gpu_device, sizeof (vertex_data), vertex_data);

From 4a3062bd061bd5df68ea34a792159bd7cd9028fe Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 5 May 2022 19:28:11 -0400
Subject: [PATCH 32/54] gpu: Added a "dummy" driver.

---
 CMakeLists.txt                |   1 +
 include/SDL_gpu.h             |   1 +
 src/gpu/SDL_gpu.c             |   2 -
 src/gpu/SDL_sysgpu.h          |  13 +++
 src/gpu/dummy/SDL_gpu_dummy.c | 163 ++++++++++++++++++++++++++++++++++
 5 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100644 src/gpu/dummy/SDL_gpu_dummy.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 47ae5b7f06c29..042d6b52509dd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -472,6 +472,7 @@ sdl_glob_sources(
   "${SDL3_SOURCE_DIR}/src/file/*.c"
   "${SDL3_SOURCE_DIR}/src/joystick/*.c"
   "${SDL3_SOURCE_DIR}/src/gpu/*.c"
+  "${SDL3_SOURCE_DIR}/src/gpu/dummy/*.c"
   "${SDL3_SOURCE_DIR}/src/haptic/*.c"
   "${SDL3_SOURCE_DIR}/src/hidapi/*.c"
   "${SDL3_SOURCE_DIR}/src/libm/*.c"
diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 1a0ca1037abcf..94be59085b2b3 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -696,6 +696,7 @@ typedef enum SDL_GpuPresentType
  * If this command buffer is to present to a window, specify a non-NULL present_window.
  *  presenttype is ignored if this isn't a render pass using a window's backbuffer.
  */
+/* !!! FIXME: obviously this can't present here, we don't know what window(s) we're presenting! */
 int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index c7fe0ab2974f2..c827f1ee33eea 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -28,9 +28,7 @@
 extern const SDL_GpuDriver DUMMY_GpuDriver;
 
 static const SDL_GpuDriver *gpu_drivers[] = {
-#if 0
     &DUMMY_GpuDriver
-#endif
 };
 
 Uint32
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index ab973768aeae0..2424fef896a70 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -27,11 +27,13 @@
 #include "SDL_gpu.h"
 #include "../SDL_hashtable.h"
 
+
 struct SDL_GpuCpuBuffer
 {
     SDL_GpuDevice *device;
     const char *label;
     Uint32 buflen;
+    void *driverdata;
 };
 
 struct SDL_GpuBuffer
@@ -39,12 +41,14 @@ struct SDL_GpuBuffer
     SDL_GpuDevice *device;
     const char *label;
     Uint32 buflen;
+    void *driverdata;
 };
 
 struct SDL_GpuTexture
 {
     SDL_GpuDevice *device;
     SDL_GpuTextureDescription desc;
+    void *driverdata;
 };
 
 struct SDL_GpuShader
@@ -52,24 +56,28 @@ struct SDL_GpuShader
     SDL_GpuDevice *device;
     const char *label;
     SDL_atomic_t refcount;
+    void *driverdata;
 };
 
 struct SDL_GpuPipeline
 {
     SDL_GpuDevice *device;
     SDL_GpuPipelineDescription desc;
+    void *driverdata;
 };
 
 struct SDL_GpuSampler
 {
     SDL_GpuDevice *device;
     SDL_GpuSamplerDescription desc;
+    void *driverdata;
 };
 
 struct SDL_GpuCommandBuffer
 {
     SDL_GpuDevice *device;
     const char *label;
+    void *driverdata;
 };
 
 struct SDL_GpuRenderPass
@@ -77,6 +85,7 @@ struct SDL_GpuRenderPass
     SDL_GpuDevice *device;
     const char *label;
     SDL_GpuCommandBuffer *cmdbuf;
+    void *driverdata;
 };
 
 struct SDL_GpuBlitPass
@@ -84,18 +93,22 @@ struct SDL_GpuBlitPass
     SDL_GpuDevice *device;
     const char *label;
     SDL_GpuCommandBuffer *cmdbuf;
+    void *driverdata;
 };
 
 struct SDL_GpuFence
 {
     SDL_GpuDevice *device;
     const char *label;
+    void *driverdata;
 };
 
 struct SDL_GpuDevice
 {
     const char *label;
 
+    void *driverdata;
+
     void (*DestroyDevice)(SDL_GpuDevice *device);
 
     int (*CreateCpuBuffer)(SDL_GpuCpuBuffer *buffer, const void *data);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
new file mode 100644
index 0000000000000..6766d8f403830
--- /dev/null
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -0,0 +1,163 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+#include "../../SDL_internal.h"
+
+/* The high-level gpu subsystem */
+
+#include "SDL.h"
+#include "../SDL_sysgpu.h"
+
+static void DUMMY_GpuDestroyDevice(SDL_GpuDevice *device) { /* no-op */ }
+
+static int DUMMY_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
+{
+    /* have to save off buffer data so we can provide it for locking, etc. */
+    buffer->driverdata = SDL_calloc(1, buffer->buflen);
+    if (!buffer->driverdata) {
+        return SDL_OutOfMemory();
+    }
+    if (data) {
+        SDL_memcpy(buffer->driverdata, data, buffer->buflen);
+    }
+    return 0;
+}
+
+static void DUMMY_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
+{
+    SDL_free(buffer->driverdata);
+}
+
+static void *DUMMY_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+{
+    return buffer->driverdata;
+}
+
+/* we could get fancier and manage imaginary GPU buffers and textures, but I don't think it's worth it atm. */
+
+static int DUMMY_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer) { return 0; }
+static int DUMMY_GpuCreateBuffer(SDL_GpuBuffer *buffer) { return 0; }
+static void DUMMY_GpuDestroyBuffer(SDL_GpuBuffer *buffer) {}
+static int DUMMY_GpuCreateTexture(SDL_GpuTexture *texture) { return 0; }
+static void DUMMY_GpuDestroyTexture(SDL_GpuTexture *texture) {}
+static int DUMMY_GpuCreateShader(SDL_GpuShader *shader, const Uint8 *bytecode, const Uint32 bytecodelen) { return 0; }
+static void DUMMY_GpuDestroyShader(SDL_GpuShader *shader) {}
+static SDL_GpuTexture *DUMMY_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window) { return NULL; /* !!! FIXME */ }
+static int DUMMY_GpuCreatePipeline(SDL_GpuPipeline *pipeline) { return 0; }
+static void DUMMY_GpuDestroyPipeline(SDL_GpuPipeline *pipeline) {}
+static int DUMMY_GpuCreateSampler(SDL_GpuSampler *sampler) { return 0; }
+static void DUMMY_GpuDestroySampler(SDL_GpuSampler *sampler) {}
+static int DUMMY_GpuCreateCommandBuffer(SDL_GpuCommandBuffer *cmdbuf) { return 0; }
+static int DUMMY_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence) { return 0; }
+static void DUMMY_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer) {}
+static int DUMMY_GpuStartRenderPass(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment) { return 0; }
+static int DUMMY_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline) { return 0; }
+static int DUMMY_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar) { return 0; }
+static int DUMMY_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, double x, double y, double width, double height) { return 0; }
+static int DUMMY_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, double red, double green, double blue, double alpha) { return 0; }
+static int DUMMY_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index) { return 0; }
+static int DUMMY_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index) { return 0; }
+static int DUMMY_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index) { return 0; }
+static int DUMMY_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index) { return 0; }
+static int DUMMY_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index) { return 0; }
+static int DUMMY_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index) { return 0; }
+static int DUMMY_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count) { return 0; }
+static int DUMMY_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset) { return 0; }
+static int DUMMY_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance) { return 0; }
+static int DUMMY_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance) { return 0; }
+static int DUMMY_GpuEndRenderPass(SDL_GpuRenderPass *pass) { return 0; }
+static int DUMMY_GpuStartBlitPass(SDL_GpuBlitPass *pass) { return 0; }
+static int DUMMY_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
+static int DUMMY_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value) { return 0; }
+static int DUMMY_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture) { return 0; }
+static int DUMMY_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
+static int DUMMY_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
+static int DUMMY_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
+static int DUMMY_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
+static int DUMMY_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch) { return 0; }
+static int DUMMY_GpuEndBlitPass(SDL_GpuBlitPass *pass) { return 0; }
+static int DUMMY_GpuCreateFence(SDL_GpuFence *fence) { return 0; }
+static void DUMMY_GpuDestroyFence(SDL_GpuFence *fence) {}
+static int DUMMY_GpuQueryFence(SDL_GpuFence *fence) { return 1; }
+static int DUMMY_GpuResetFence(SDL_GpuFence *fence) { return 0; }
+static int DUMMY_GpuWaitFence(SDL_GpuFence *fence) { return 0; }
+
+static int
+DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
+{
+    device->DestroyDevice = DUMMY_GpuDestroyDevice;
+    device->CreateCpuBuffer = DUMMY_GpuCreateCpuBuffer;
+    device->DestroyCpuBuffer = DUMMY_GpuDestroyCpuBuffer;
+    device->LockCpuBuffer = DUMMY_GpuLockCpuBuffer;
+    device->UnlockCpuBuffer = DUMMY_GpuUnlockCpuBuffer;
+    device->CreateBuffer = DUMMY_GpuCreateBuffer;
+    device->DestroyBuffer = DUMMY_GpuDestroyBuffer;
+    device->CreateTexture = DUMMY_GpuCreateTexture;
+    device->DestroyTexture = DUMMY_GpuDestroyTexture;
+    device->CreateShader = DUMMY_GpuCreateShader;
+    device->DestroyShader = DUMMY_GpuDestroyShader;
+    device->GetBackbuffer = DUMMY_GpuGetBackbuffer;
+    device->CreatePipeline = DUMMY_GpuCreatePipeline;
+    device->DestroyPipeline = DUMMY_GpuDestroyPipeline;
+    device->CreateSampler = DUMMY_GpuCreateSampler;
+    device->DestroySampler = DUMMY_GpuDestroySampler;
+    device->CreateCommandBuffer = DUMMY_GpuCreateCommandBuffer;
+    device->SubmitCommandBuffers = DUMMY_GpuSubmitCommandBuffers;
+    device->AbandonCommandBuffer = DUMMY_GpuAbandonCommandBuffer;
+    device->StartRenderPass = DUMMY_GpuStartRenderPass;
+    device->SetRenderPassPipeline = DUMMY_GpuSetRenderPassPipeline;
+    device->SetRenderPassViewport = DUMMY_GpuSetRenderPassViewport;
+    device->SetRenderPassScissor = DUMMY_GpuSetRenderPassScissor;
+    device->SetRenderPassBlendConstant = DUMMY_GpuSetRenderPassBlendConstant;
+    device->SetRenderPassVertexBuffer = DUMMY_GpuSetRenderPassVertexBuffer;
+    device->SetRenderPassVertexSampler = DUMMY_GpuSetRenderPassVertexSampler;
+    device->SetRenderPassVertexTexture = DUMMY_GpuSetRenderPassVertexTexture;
+    device->SetRenderPassFragmentBuffer = DUMMY_GpuSetRenderPassFragmentBuffer;
+    device->SetRenderPassFragmentSampler = DUMMY_GpuSetRenderPassFragmentSampler;
+    device->SetRenderPassFragmentTexture = DUMMY_GpuSetRenderPassFragmentTexture;
+    device->Draw = DUMMY_GpuDraw;
+    device->DrawIndexed = DUMMY_GpuDrawIndexed;
+    device->DrawInstanced = DUMMY_GpuDrawInstanced;
+    device->DrawInstancedIndexed = DUMMY_GpuDrawInstancedIndexed;
+    device->EndRenderPass = DUMMY_GpuEndRenderPass;
+    device->StartBlitPass = DUMMY_GpuStartBlitPass;
+    device->CopyBetweenTextures = DUMMY_GpuCopyBetweenTextures;
+    device->FillBuffer = DUMMY_GpuFillBuffer;
+    device->GenerateMipmaps = DUMMY_GpuGenerateMipmaps;
+    device->CopyBufferCpuToGpu = DUMMY_GpuCopyBufferCpuToGpu;
+    device->CopyBufferGpuToCpu = DUMMY_GpuCopyBufferGpuToCpu;
+    device->CopyBufferGpuToGpu = DUMMY_GpuCopyBufferGpuToGpu;
+    device->CopyFromBufferToTexture = DUMMY_GpuCopyFromBufferToTexture;
+    device->CopyFromTextureToBuffer = DUMMY_GpuCopyFromTextureToBuffer;
+    device->EndBlitPass = DUMMY_GpuEndBlitPass;
+    device->CreateFence = DUMMY_GpuCreateFence;
+    device->DestroyFence = DUMMY_GpuDestroyFence;
+    device->QueryFence = DUMMY_GpuQueryFence;
+    device->ResetFence = DUMMY_GpuResetFence;
+    device->WaitFence = DUMMY_GpuWaitFence;
+
+    return 0;  /* okay, always succeeds. */
+}
+
+const SDL_GpuDriver DUMMY_GpuDriver = {
+    "dummy", DUMMY_GpuCreateDevice
+};
+
+/* vi: set ts=4 sw=4 expandtab: */

From 5f4871ae9113aba887f06d3dcb5a723640820369 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 6 May 2022 10:45:19 -0400
Subject: [PATCH 33/54] gpu: Decide how present should work.

---
 include/SDL_gpu.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 94be59085b2b3..c6357f21a9b02 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -248,7 +248,7 @@ void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
 
 /*
- * Get a texture that can be used for rendering to an SDL window. The window
+ * Get a texture that can be used for rendering to an SDL window. The SDL_Window
  * may be destroyed and recreated internally on first use if incompatible with the SDL_GpuDevice!
  * As such, it does not need to be created with SDL_WINDOW_OPENGL or _VULKAN,
  * etc, as this API will take care of it.
@@ -258,6 +258,13 @@ void SDL_GpuDestroyShader(SDL_GpuShader *shader);
  *
  * This call may block if you've got every backbuffer from the window in flight, rendering other
  * frames that haven't completed yet. Use fences if you need to avoid this.
+ *
+ * SDL decides if a window backbuffer is in flight when a command buffer that uses the
+ * texture from this function as a color attachment in a render pass is submitted with
+ * a SDL_GpuPresentType that isn't SDL_GPUPRESENT_NONE. Until then, this function will
+ * return the same texture for the same window, in case you plan to do several rendering
+ * passes before presenting. Once the backbuffer is in-flight, the next call to this
+ * function may block or return a different texture.
  */
 SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
 
@@ -693,10 +700,13 @@ typedef enum SDL_GpuPresentType
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
  *
- * If this command buffer is to present to a window, specify a non-NULL present_window.
- *  presenttype is ignored if this isn't a render pass using a window's backbuffer.
+ * If this command buffer is to present to a window, specify a presenttype other than SDL_GPUPRESENT_NONE. Any backbuffers that were used
+ *  as color attachments in render passes in the submitted set of command buffers will present to the screen, and the next call to SDL_GpuGetBackbuffer
+ *  may return different textures for those windows as it cycles through double or triple buffering.
+ * `presenttype` is ignored if these command buffers did not contain a render pass using a window's backbuffer. It is legal to present multiple windows
+ *  in the same submission, but they will all use the same `presenttype`. Note that presenting a window with vsync will not block here, as this just
+ *  queues the request; supply a fence if you need to wait for the presentation to complete.
  */
-/* !!! FIXME: obviously this can't present here, we don't know what window(s) we're presenting! */
 int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can

From 862adbca5732b726ddce15ba338342218216e281 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 6 May 2022 23:01:46 -0400
Subject: [PATCH 34/54] gpu: Further rethinking of backbuffer management and
 presentation.

---
 include/SDL_gpu.h             | 64 ++++++++++++++++++-----------------
 src/gpu/SDL_gpu.c             | 48 ++++++++++++++++++++++----
 src/gpu/SDL_sysgpu.h          |  6 +++-
 src/gpu/dummy/SDL_gpu_dummy.c |  8 ++++-
 src/video/SDL_sysvideo.h      |  3 ++
 test/testgpu_simple_clear.c   |  6 +++-
 test/testgpu_spinning_cube.c  |  3 +-
 7 files changed, 97 insertions(+), 41 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index c6357f21a9b02..f3eaa0118d656 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -249,22 +249,27 @@ void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
 /*
  * Get a texture that can be used for rendering to an SDL window. The SDL_Window
- * may be destroyed and recreated internally on first use if incompatible with the SDL_GpuDevice!
- * As such, it does not need to be created with SDL_WINDOW_OPENGL or _VULKAN,
- * etc, as this API will take care of it.
- * Do not save this texture beyond using it for a render pass color attachment! It's likely that
- * a window has multiple textures that it cycles through (and even those might get replaced if
- * the window is resized or hidden or if the OS just feels like it moment by moment).
+ * may be destroyed and recreated internally on first use if incompatible with
+ * the SDL_GpuDevice! As such, it does not need to be created with
+ * SDL_WINDOW_OPENGL or _VULKAN, etc, as this API will take care of it.
  *
- * This call may block if you've got every backbuffer from the window in flight, rendering other
- * frames that haven't completed yet. Use fences if you need to avoid this.
+ * SDL_Windows can not be used with more than one GPU device at a time (even if
+ * they're both using the same backend).
  *
- * SDL decides if a window backbuffer is in flight when a command buffer that uses the
- * texture from this function as a color attachment in a render pass is submitted with
- * a SDL_GpuPresentType that isn't SDL_GPUPRESENT_NONE. Until then, this function will
- * return the same texture for the same window, in case you plan to do several rendering
- * passes before presenting. Once the backbuffer is in-flight, the next call to this
- * function may block or return a different texture.
+ * Do not save this texture beyond rendering a single frame! It's likely that
+ * a window has multiple textures that it cycles through as it renders and
+ * presents frames (and even those textures might get replaced if the window
+ * is resized or hidden or if the OS just feels like it moment by moment).
+ *
+ * A window backbuffer's texture is considered "in-flight" when a SDL_GpuPresent()
+ * call is made with that texture. Until then, this function will return the same
+ * texture for the same window, in case you plan to do several rendering passes
+ * before presenting. Once a backbuffer is in-flight, the next call to this
+ * function may return a different texture (and/or block until a texture
+ * becomes available; you can use fences to avoid blocking).
+ *
+ * Do not call this function from multiple threads for the same device/window
+ * at the same time.
  */
 SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
 
@@ -687,33 +692,30 @@ int SDL_GpuResetFence(SDL_GpuFence *fence);
 int SDL_GpuWaitFence(SDL_GpuFence *fence);
 
 
-typedef enum SDL_GpuPresentType
-{
-    SDL_GPUPRESENT_NONE,   /* don't present (not rendering to a window or more command buffers to queue first) */
-    SDL_GPUPRESENT_IMMEDIATE, /* present immediately, don't wait for vsync */
-    SDL_GPUPRESENT_VSYNC,  /* present synced to vertical retrace */
-    SDL_GPUPRESENT_ADAPTIVE_VSYNC  /* vsync if we're running fast enough, immediate if we've missed vsync. If unsupported, this waits for vsync. */
-} SDL_GpuPresentType;
-
 /*
  * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
- *
- * If this command buffer is to present to a window, specify a presenttype other than SDL_GPUPRESENT_NONE. Any backbuffers that were used
- *  as color attachments in render passes in the submitted set of command buffers will present to the screen, and the next call to SDL_GpuGetBackbuffer
- *  may return different textures for those windows as it cycles through double or triple buffering.
- * `presenttype` is ignored if these command buffers did not contain a render pass using a window's backbuffer. It is legal to present multiple windows
- *  in the same submission, but they will all use the same `presenttype`. Note that presenting a window with vsync will not block here, as this just
- *  queues the request; supply a fence if you need to wait for the presentation to complete.
  */
-int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
+int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
    abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */
 void SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs);
 
-/* !!! FIXME: add a SDL_GpuAbandonCommandBuffer() function for freeing a buffer without submitting it? */
+/* Present a window's current backbuffer to the display. This will take the current SDL_GpuTexture returned by SDL_GpuGetBackbuffer
+ *  and queue it for presentation. The presentation request is queued after any submitted command buffers, so you should call this
+ *  as soon as you've submitted any command buffers that provide the rendering you'd like to do for the frame without waiting for
+ *  those command buffers to finish processing.
+ * As soon as this call completes, the backbuffer is considered "in-flight."
+ * While the backbuffer is in-flight, the next call to SDL_GpuGetBackbuffer will return a different
+ *  texture and/or block. Do not use this backbuffer again after requesting presentation with it, as its pointer is
+ *  considered invalid and you should request a new one from SDL_GpuGetBackbuffer. Note that presenting a window with vsync will
+ *  not block here, as this just queues the request. You should call this once per frame after rendering to a new backbuffer. If you
+ *  haven't rendered to a backbuffer before presenting (or requested one with SDL_GpuGetBackbuffer), the results of this call are
+ *  undefined.
+ */
+int SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval);
 
 
 /* Helper functions. These are optional and built on top of the public API to remove boilerplate from your code. */
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index c827f1ee33eea..c68a28d22f5e1 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -24,6 +24,8 @@
 
 #include "SDL.h"
 #include "SDL_sysgpu.h"
+#include "../video/SDL_sysvideo.h"
+
 
 extern const SDL_GpuDriver DUMMY_GpuDriver;
 
@@ -338,14 +340,28 @@ SDL_GpuDestroyShader(SDL_GpuShader *shader)
 SDL_GpuTexture *
 SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
 {
+    SDL_GpuTexture *retval = NULL;
     if (!device) {
         SDL_InvalidParamError("device");
-        return NULL;
     } else if (!window) {
         SDL_InvalidParamError("window");
-        return NULL;
+    } else if (window->gpu_device && (window->gpu_device != device)) {
+        SDL_SetError("Window is being used by another GPU device");
+    } else {
+        if (window->gpu_device == NULL) {
+            if (device->ClaimWindow(device, window) == -1) {
+                return NULL;
+            }
+            window->gpu_device = device;
+        }
+
+        if (!window->gpu_backbuffer) {   /* if !NULL, already requested one that isn't yet in-flight for presentation. */
+            window->gpu_backbuffer = device->GetBackbuffer(device, window);
+        }
+
+        retval = (SDL_GpuTexture *) window->gpu_backbuffer;
     }
-    return device->GetBackbuffer(device, window);
+    return retval;
 }
 
 SDL_GpuPipeline *
@@ -1095,7 +1111,7 @@ SDL_GpuWaitFence(SDL_GpuFence *fence)
 }
 
 int
-SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence)
+SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence)
 {
     int retval;
     Uint32 i;
@@ -1114,7 +1130,7 @@ SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffer
         }
     }
 
-    retval = device->SubmitCommandBuffers(device, buffers, numcmdbufs, presenttype, fence);
+    retval = device->SubmitCommandBuffers(device, buffers, numcmdbufs, fence);
 
     if (retval == 0) {
         for (i = 0; i < numcmdbufs; i++) {
@@ -1139,6 +1155,26 @@ SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmd
     }
 }
 
+int
+SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval)
+{
+    if (!device) {
+        return SDL_InvalidParamError("device");
+    } else if (!window) {
+        return SDL_InvalidParamError("window");
+    } else if (window->gpu_device != device) {
+        return SDL_SetError("Window is not claimed by this GPU device (call SDL_GpuGetBackbuffer first!)");
+    } else if (!window->gpu_backbuffer) {
+        return SDL_SetError("Window does not have a prepared backbuffer (call SDL_GpuGetBackbuffer first!)");
+    } else if (device->Present(device, window, (SDL_GpuTexture *) window->gpu_backbuffer, swapinterval) == -1) {
+        return -1;
+    }
+
+    window->gpu_backbuffer = NULL;  /* it's in-flight, mark the window as having no current backbuffer. */
+
+    return 0;
+}
+
 SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
 {
     SDL_GpuFence *fence = NULL;
@@ -1163,7 +1199,7 @@ SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *devi
          ((blit = SDL_GpuStartBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
         SDL_GpuCopyBufferCpuToGpu(blit, staging, 0, gpubuf, 0, buflen);
         SDL_GpuEndBlitPass(blit);
-        SDL_GpuSubmitCommandBuffers(device, &cmd, 1, SDL_GPUPRESENT_NONE, fence);
+        SDL_GpuSubmitCommandBuffers(device, &cmd, 1, fence);
         SDL_GpuWaitFence(fence);  /* so we know it's definitely uploaded */
         retval = gpubuf;
     }
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 2424fef896a70..48fefaa0200c6 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -111,6 +111,8 @@ struct SDL_GpuDevice
 
     void (*DestroyDevice)(SDL_GpuDevice *device);
 
+    int (*ClaimWindow)(SDL_GpuDevice *device, SDL_Window *window);
+
     int (*CreateCpuBuffer)(SDL_GpuCpuBuffer *buffer, const void *data);
     void (*DestroyCpuBuffer)(SDL_GpuCpuBuffer *buffer);
     void *(*LockCpuBuffer)(SDL_GpuCpuBuffer *buffer);
@@ -134,7 +136,7 @@ struct SDL_GpuDevice
     void (*DestroySampler)(SDL_GpuSampler *sampler);
 
     int (*CreateCommandBuffer)(SDL_GpuCommandBuffer *cmdbuf);
-    int (*SubmitCommandBuffers)(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence);
+    int (*SubmitCommandBuffers)(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence);
     void (*AbandonCommandBuffer)(SDL_GpuCommandBuffer *buffer);
 
     int (*StartRenderPass)(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment);
@@ -165,6 +167,8 @@ struct SDL_GpuDevice
     int (*CopyFromTextureToBuffer)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
     int (*EndBlitPass)(SDL_GpuBlitPass *pass);
 
+    int (*Present)(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval);
+
     int (*CreateFence)(SDL_GpuFence *fence);
     void (*DestroyFence)(SDL_GpuFence *fence);
     int (*QueryFence)(SDL_GpuFence *fence);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
index 6766d8f403830..df5c9f14ea93c 100644
--- a/src/gpu/dummy/SDL_gpu_dummy.c
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -25,8 +25,11 @@
 #include "SDL.h"
 #include "../SDL_sysgpu.h"
 
+
 static void DUMMY_GpuDestroyDevice(SDL_GpuDevice *device) { /* no-op */ }
 
+static int DUMMY_GpuClaimWindow(SDL_GpuDevice *device, SDL_Window *window) { return 0; }
+
 static int DUMMY_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
 {
     /* have to save off buffer data so we can provide it for locking, etc. */
@@ -65,7 +68,7 @@ static void DUMMY_GpuDestroyPipeline(SDL_GpuPipeline *pipeline) {}
 static int DUMMY_GpuCreateSampler(SDL_GpuSampler *sampler) { return 0; }
 static void DUMMY_GpuDestroySampler(SDL_GpuSampler *sampler) {}
 static int DUMMY_GpuCreateCommandBuffer(SDL_GpuCommandBuffer *cmdbuf) { return 0; }
-static int DUMMY_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuPresentType presenttype, SDL_GpuFence *fence) { return 0; }
+static int DUMMY_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence) { return 0; }
 static void DUMMY_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer) {}
 static int DUMMY_GpuStartRenderPass(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment) { return 0; }
 static int DUMMY_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline) { return 0; }
@@ -93,6 +96,7 @@ static int DUMMY_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *src
 static int DUMMY_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
 static int DUMMY_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch) { return 0; }
 static int DUMMY_GpuEndBlitPass(SDL_GpuBlitPass *pass) { return 0; }
+static int DUMMY_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval) { return 0; }
 static int DUMMY_GpuCreateFence(SDL_GpuFence *fence) { return 0; }
 static void DUMMY_GpuDestroyFence(SDL_GpuFence *fence) {}
 static int DUMMY_GpuQueryFence(SDL_GpuFence *fence) { return 1; }
@@ -103,6 +107,7 @@ static int
 DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
 {
     device->DestroyDevice = DUMMY_GpuDestroyDevice;
+    device->ClaimWindow = DUMMY_GpuClaimWindow;
     device->CreateCpuBuffer = DUMMY_GpuCreateCpuBuffer;
     device->DestroyCpuBuffer = DUMMY_GpuDestroyCpuBuffer;
     device->LockCpuBuffer = DUMMY_GpuLockCpuBuffer;
@@ -147,6 +152,7 @@ DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
     device->CopyFromBufferToTexture = DUMMY_GpuCopyFromBufferToTexture;
     device->CopyFromTextureToBuffer = DUMMY_GpuCopyFromTextureToBuffer;
     device->EndBlitPass = DUMMY_GpuEndBlitPass;
+    device->Present = DUMMY_GpuPresent;
     device->CreateFence = DUMMY_GpuCreateFence;
     device->DestroyFence = DUMMY_GpuDestroyFence;
     device->QueryFence = DUMMY_GpuQueryFence;
diff --git a/src/video/SDL_sysvideo.h b/src/video/SDL_sysvideo.h
index 20bb76912cbfa..3d7b414bdee85 100644
--- a/src/video/SDL_sysvideo.h
+++ b/src/video/SDL_sysvideo.h
@@ -108,6 +108,9 @@ struct SDL_Window
 
     SDL_WindowData *driverdata;
 
+    void *gpu_device;  /* this is only used by the GPU API, don't touch. */
+    void *gpu_backbuffer;  /* this is only used by the GPU API, don't touch. */
+
     SDL_Window *prev;
     SDL_Window *next;
 
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index 3e42ad7fce971..236eeb54331f4 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -78,10 +78,14 @@ static void render(SDL_Window *window)
         quit(2);
     }
 
+    SDL_GpuEndRenderPass(pass);
+
     /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
-    SDL_GpuSubmitCommandBuffers(gpuDevice, &cmd, 1, SDL_GPUPRESENT_VSYNC, NULL);
+    SDL_GpuSubmitCommandBuffers(gpuDevice, &cmd, 1, NULL);
+    SDL_GpuPresent(gpuDevice, window, 1);
 }
 
+
 int main(int argc, char **argv)
 {
     int done;
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index 80965e8267ef8..50e4cb4a53906 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -362,7 +362,8 @@ Render(SDL_Window *window, const int windownum)
     SDL_GpuDraw(render, 0, SDL_arraysize(vertex_data));
     SDL_GpuEndRenderPass(render);
 
-    SDL_GpuSubmitCommandBuffers(gpu_device, &cmd, 1, presenttype, NULL);  /* push work to the GPU and tell it to present to the window when done. */
+    SDL_GpuSubmitCommandBuffers(gpu_device, &cmd, 1, NULL);  /* push work to the GPU and tell it to present to the window when done. */
+    SDL_GpuPresent(gpu_device, window, 1);
 }
 
 static SDL_GpuShader *load_shader(const char *label, const char *src, const char *type)

From 58f6cf8208778a3954e58f525f8c49496797cc62 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 10 May 2022 23:47:57 -0400
Subject: [PATCH 35/54] gpu: Fixed a bunch of things, filled in some obvious
 missing API bits.

---
 include/SDL_gpu.h             |  35 +++++---
 src/gpu/SDL_gpu.c             | 147 +++++++++++++++++++++++-----------
 src/gpu/SDL_sysgpu.h          |   6 +-
 src/gpu/dummy/SDL_gpu_dummy.c |   6 +-
 4 files changed, 133 insertions(+), 61 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index f3eaa0118d656..77fe33f2731d6 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -196,12 +196,14 @@ typedef enum SDL_GpuTextureType
     SDL_GPUTEXTYPE_2D,
     SDL_GPUTEXTYPE_CUBE,
     SDL_GPUTEXTYPE_3D,
+    SDL_GPUTEXTYPE_1D_ARRAY,
     SDL_GPUTEXTYPE_2D_ARRAY,
     SDL_GPUTEXTYPE_CUBE_ARRAY
 } SDL_GpuTextureType;
 
 typedef enum SDL_GpuPixelFormat
 {
+    SDL_GPUPIXELFMT_INVALID,
     SDL_GPUPIXELFMT_B5G6R5,
     SDL_GPUPIXELFMT_BGR5A1,
     SDL_GPUPIXELFMT_RGBA8,
@@ -230,8 +232,8 @@ typedef struct SDL_GpuTextureDescription
     SDL_GpuPixelFormat pixel_format;
     SDL_GpuTextureUsage usage;  /* OR SDL_GpuTextureUsage values together */
     Uint32 width;
-    Uint32 height;
-    Uint32 depth_or_slices;
+    Uint32 height;  /* for cubemaps, this must match width. */
+    Uint32 depth_or_slices;  /* must be six for cubemap, multiple of six for cubemap array, depth for 3D texture, array count for arrays, and 1 for everything else. */
     Uint32 mipmap_levels;
 } SDL_GpuTextureDescription;
 
@@ -327,6 +329,7 @@ typedef struct SDL_GpuPipelineColorAttachmentDescription
 
 typedef enum SDL_GpuVertexFormat
 {
+    SDL_GPUVERTFMT_INVALID,
     SDL_GPUVERTFMT_UCHAR2,
     SDL_GPUVERTFMT_UCHAR4,
     SDL_GPUVERTFMT_CHAR2,
@@ -370,6 +373,7 @@ typedef struct SDL_GpuVertexAttributeDescription
     Uint32 offset;
     Uint32 stride;
     Uint32 index;
+    // !!! FIXME: step rate and step function for instancing
 } SDL_GpuVertexAttributeDescription;
 
 typedef enum SDL_GpuCompareFunction
@@ -426,6 +430,18 @@ typedef enum SDL_GpuCullFace
     /* !!! FIXME: Vulkan lets you cull front-and-back (i.e. - everything) */
 } SDL_GpuCullFace;
 
+typedef struct SDL_GpuDepthStecilDescription
+{
+    Uint32 stencil_read_mask;
+    Uint32 stencil_write_mask;
+    Uint32 stencil_reference;
+    SDL_GpuCompareFunction stencil_function;
+    SDL_GpuStencilOperation stencil_fail;
+    SDL_GpuStencilOperation depth_fail;
+    SDL_GpuStencilOperation depth_and_stencil_pass;
+} SDL_GpuDepthStecilDescription;
+
+
 #define SDL_GPU_MAX_COLOR_ATTACHMENTS 4   /* !!! FIXME: what's a sane number here? */
 #define SDL_GPU_MAX_VERTEX_ATTRIBUTES 32   /* !!! FIXME: what's a sane number here? */
 typedef struct SDL_GpuPipelineDescription
@@ -441,15 +457,9 @@ typedef struct SDL_GpuPipelineDescription
     SDL_GpuPixelFormat depth_format;
     SDL_GpuPixelFormat stencil_format;
     SDL_bool depth_write_enabled;
-    Uint32 stencil_read_mask;
-    Uint32 stencil_write_mask;
-    Uint32 stencil_reference_front;
-    Uint32 stencil_reference_back;
     SDL_GpuCompareFunction depth_function;
-    SDL_GpuCompareFunction stencil_function;
-    SDL_GpuStencilOperation stencil_fail;
-    SDL_GpuStencilOperation depth_fail;
-    SDL_GpuStencilOperation depth_and_stencil_pass;
+    SDL_GpuDepthStecilDescription depth_stencil_front;
+    SDL_GpuDepthStecilDescription depth_stencil_back;
     SDL_GpuFillMode fill_mode;
     SDL_GpuFrontFace front_face;
     SDL_GpuCullFace cull_face;
@@ -510,6 +520,7 @@ typedef struct SDL_GpuSamplerDescription
     SDL_GpuSamplerMinMagFilter min_filter;
     SDL_GpuSamplerMinMagFilter mag_filter;
     SDL_GpuSamplerMipFilter mip_filter;
+    Uint32 max_anisotropy;
 } SDL_GpuSamplerDescription;
 
 typedef struct SDL_GpuSampler SDL_GpuSampler;
@@ -563,6 +574,8 @@ void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
 typedef struct SDL_GpuCommandBuffer SDL_GpuCommandBuffer;
 SDL_GpuCommandBuffer *SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device);
 
+/* !!! FIXME: push/pop debug groups? */
+
 
 /* RENDERING PASSES... */
 
@@ -640,7 +653,7 @@ typedef enum SDL_GpuIndexType
 int SDL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
 int SDL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
 int SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
-int SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
+int SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance);
 
 /* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
 int SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index c68a28d22f5e1..d1fcfe97f2e9b 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -94,15 +94,15 @@ static void *allocate_obj_and_string(const size_t objlen, const char *str, char
 
 #define ALLOC_OBJ_WITH_DESC(typ, var, dsc) { \
     char *cpystr; \
-    var = (typ *) allocate_obj_and_string(sizeof (typ), dsc->label, &cpystr); \
+    var = (typ *) allocate_obj_and_string(sizeof (typ), (dsc)->label, &cpystr); \
     if (var != NULL) { \
-        SDL_memcpy(&var->desc, dsc, sizeof (*dsc));\
+        SDL_memcpy(&var->desc, dsc, sizeof (*(dsc)));\
         var->desc.label = cpystr; \
     } \
 }
 
 #define FREE_AND_NULL_OBJ_WITH_DESC(obj) { \
-    SDL_free((void *) obj->desc.label); \
+    SDL_free((void *) ((obj)->desc.label)); \
     SDL_free(obj); \
     obj = NULL; \
 }
@@ -269,6 +269,18 @@ SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *des
         SDL_InvalidParamError("device");
     } else if (!desc) {
         SDL_InvalidParamError("desc");
+    } else if (desc->depth_or_slices == 0) {
+        SDL_SetError("depth_or_slices must be > 0");
+    } else if ((desc->texture_type == SDL_GPUTEXTYPE_CUBE) && (desc->depth_or_slices != 6)) {
+        SDL_SetError("depth_or_slices for a cubemap must be 6");
+    } else if ((desc->texture_type == SDL_GPUTEXTYPE_CUBE_ARRAY) && ((desc->depth_or_slices % 6) != 0)) {
+        SDL_SetError("depth_or_slices for a cubemap array must be a multiple of 6");
+    } else if ((desc->texture_type == SDL_GPUTEXTYPE_CUBE_ARRAY) && ((desc->depth_or_slices % 6) != 0)) {
+        SDL_SetError("depth_or_slices for a cubemap array must be a multiple of 6");
+    } else if (((desc->texture_type == SDL_GPUTEXTYPE_1D) || (desc->texture_type == SDL_GPUTEXTYPE_2D)) && (desc->depth_or_slices != 1)) {
+        SDL_SetError("depth_or_slices for 1D and 2D textures must be 1");
+    } else if (((desc->texture_type == SDL_GPUTEXTYPE_CUBE) || (desc->texture_type == SDL_GPUTEXTYPE_CUBE_ARRAY)) && ((desc->width != desc->height))) {
+        SDL_SetError("cubemaps must have the same width and height");
     } else {
         ALLOC_OBJ_WITH_DESC(SDL_GpuTexture, texture, desc);
         if (texture != NULL) {
@@ -372,9 +384,13 @@ SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *d
         SDL_InvalidParamError("device");
     } else if (!desc) {
         SDL_InvalidParamError("desc");
-    } else if (desc->vertex_shader && (desc->vertex_shader->device != device)) {
+    } else if (!desc->vertex_shader) {
+        SDL_SetError("vertex shader is NULL");
+    } else if (!desc->fragment_shader) {
+        SDL_SetError("fragment shader is NULL");
+    } else if (desc->vertex_shader->device != device) {
         SDL_SetError("vertex shader is not from this device");
-    } else if (desc->fragment_shader && (desc->fragment_shader->device != device)) {
+    } else if (desc->fragment_shader->device != device) {
         SDL_SetError("fragment shader is not from this device");
     } else {
         ALLOC_OBJ_WITH_DESC(SDL_GpuPipeline, pipeline, desc);
@@ -383,12 +399,8 @@ SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *d
             if (device->CreatePipeline(pipeline) == -1) {
                 FREE_AND_NULL_OBJ_WITH_DESC(pipeline);
             } else {
-                if (pipeline->desc.vertex_shader) {
-                    SDL_AtomicIncRef(&pipeline->desc.vertex_shader->refcount);
-                }
-                if (pipeline->desc.fragment_shader) {
-                    SDL_AtomicIncRef(&pipeline->desc.fragment_shader->refcount);
-                }
+                SDL_AtomicIncRef(&desc->vertex_shader->refcount);
+                SDL_AtomicIncRef(&desc->fragment_shader->refcount);
             }
         }
     }
@@ -428,13 +440,19 @@ SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc)
     desc->depth_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
     desc->stencil_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
     desc->depth_write_enabled = SDL_TRUE;
-    desc->stencil_read_mask = 0xFFFFFFFF;
-    desc->stencil_write_mask = 0xFFFFFFFF;
     desc->depth_function = SDL_GPUCMPFUNC_LESS;
-    desc->stencil_function = SDL_GPUCMPFUNC_ALWAYS;
-    desc->stencil_fail = SDL_GPUSTENCILOP_KEEP;
-    desc->depth_fail = SDL_GPUSTENCILOP_KEEP;
-    desc->depth_and_stencil_pass = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_front.stencil_read_mask = 0xFFFFFFFF;
+    desc->depth_stencil_front.stencil_write_mask = 0xFFFFFFFF;
+    desc->depth_stencil_front.stencil_function = SDL_GPUCMPFUNC_ALWAYS;
+    desc->depth_stencil_front.stencil_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_front.depth_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_front.depth_and_stencil_pass = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_back.stencil_read_mask = 0xFFFFFFFF;
+    desc->depth_stencil_back.stencil_write_mask = 0xFFFFFFFF;
+    desc->depth_stencil_back.stencil_function = SDL_GPUCMPFUNC_ALWAYS;
+    desc->depth_stencil_back.stencil_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_back.depth_fail = SDL_GPUSTENCILOP_KEEP;
+    desc->depth_stencil_back.depth_and_stencil_pass = SDL_GPUSTENCILOP_KEEP;
     desc->fill_mode = SDL_GPUFILL_FILL;
     desc->front_face = SDL_GPUFRONTFACE_COUNTER_CLOCKWISE;
     desc->cull_face = SDL_GPUCULLFACE_BACK;
@@ -551,15 +569,21 @@ static Uint32 hash_pipeline(const void *key, void *data)
     CRC32_APPEND_VAR(crc, desc->depth_format);
     CRC32_APPEND_VAR(crc, desc->stencil_format);
     CRC32_APPEND_VAR(crc, desc->depth_write_enabled);
-    CRC32_APPEND_VAR(crc, desc->stencil_read_mask);
-    CRC32_APPEND_VAR(crc, desc->stencil_write_mask);
-    CRC32_APPEND_VAR(crc, desc->stencil_reference_front);
-    CRC32_APPEND_VAR(crc, desc->stencil_reference_back);
     CRC32_APPEND_VAR(crc, desc->depth_function);
-    CRC32_APPEND_VAR(crc, desc->stencil_function);
-    CRC32_APPEND_VAR(crc, desc->stencil_fail);
-    CRC32_APPEND_VAR(crc, desc->depth_fail);
-    CRC32_APPEND_VAR(crc, desc->depth_and_stencil_pass);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.stencil_read_mask);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.stencil_write_mask);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.stencil_reference);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.stencil_function);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.stencil_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.depth_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_front.depth_and_stencil_pass);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.stencil_read_mask);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.stencil_write_mask);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.stencil_reference);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.stencil_function);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.stencil_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.depth_fail);
+    CRC32_APPEND_VAR(crc, desc->depth_stencil_back.depth_and_stencil_pass);
     CRC32_APPEND_VAR(crc, desc->fill_mode);
     CRC32_APPEND_VAR(crc, desc->front_face);
     CRC32_APPEND_VAR(crc, desc->cull_face);
@@ -585,15 +609,21 @@ static SDL_bool keymatch_pipeline(const void *_a, const void *_b, void *data)
          (a->depth_format != b->depth_format) ||
          (a->stencil_format != b->stencil_format) ||
          (a->depth_write_enabled != b->depth_write_enabled) ||
-         (a->stencil_read_mask != b->stencil_read_mask) ||
-         (a->stencil_write_mask != b->stencil_write_mask) ||
-         (a->stencil_reference_front != b->stencil_reference_front) ||
-         (a->stencil_reference_back != b->stencil_reference_back) ||
          (a->depth_function != b->depth_function) ||
-         (a->stencil_function != b->stencil_function) ||
-         (a->stencil_fail != b->stencil_fail) ||
-         (a->depth_fail != b->depth_fail) ||
-         (a->depth_and_stencil_pass != b->depth_and_stencil_pass) ||
+         (a->depth_stencil_front.stencil_read_mask != b->depth_stencil_front.stencil_read_mask) ||
+         (a->depth_stencil_front.stencil_write_mask != b->depth_stencil_front.stencil_write_mask) ||
+         (a->depth_stencil_front.stencil_reference != b->depth_stencil_front.stencil_reference) ||
+         (a->depth_stencil_front.stencil_function != b->depth_stencil_front.stencil_function) ||
+         (a->depth_stencil_front.stencil_fail != b->depth_stencil_front.stencil_fail) ||
+         (a->depth_stencil_front.depth_fail != b->depth_stencil_front.depth_fail) ||
+         (a->depth_stencil_front.depth_and_stencil_pass != b->depth_stencil_front.depth_and_stencil_pass) ||
+         (a->depth_stencil_back.stencil_read_mask != b->depth_stencil_back.stencil_read_mask) ||
+         (a->depth_stencil_back.stencil_write_mask != b->depth_stencil_back.stencil_write_mask) ||
+         (a->depth_stencil_back.stencil_reference != b->depth_stencil_back.stencil_reference) ||
+         (a->depth_stencil_back.stencil_function != b->depth_stencil_back.stencil_function) ||
+         (a->depth_stencil_back.stencil_fail != b->depth_stencil_back.stencil_fail) ||
+         (a->depth_stencil_back.depth_fail != b->depth_stencil_back.depth_fail) ||
+         (a->depth_stencil_back.depth_and_stencil_pass != b->depth_stencil_back.depth_and_stencil_pass) ||
          (a->fill_mode != b->fill_mode) ||
          (a->front_face != b->front_face) ||
          (a->cull_face != b->cull_face) ||
@@ -639,10 +669,9 @@ static SDL_bool keymatch_pipeline(const void *_a, const void *_b, void *data)
 
 void nuke_pipeline(const void *key, const void *value, void *data)
 {
-    SDL_GpuPipelineDescription *desc = (SDL_GpuPipelineDescription *) key;
-    SDL_free((void *) desc->label);
-    SDL_free(desc);
-    SDL_GpuDestroyPipeline((SDL_GpuPipeline *) value);
+    SDL_GpuPipeline *pipeline = (SDL_GpuPipeline *) value;
+    SDL_assert(key == &pipeline->desc);
+    SDL_GpuDestroyPipeline(pipeline);
 }
 
 
@@ -679,10 +708,9 @@ static SDL_bool keymatch_sampler(const void *_a, const void *_b, void *data)
 
 void nuke_sampler(const void *key, const void *value, void *data)
 {
-    SDL_GpuSamplerDescription *desc = (SDL_GpuSamplerDescription *) key;
-    SDL_free((void *) desc->label);
-    SDL_free(desc);
-    SDL_GpuDestroySampler((SDL_GpuSampler *) value);
+    SDL_GpuSampler *sampler = (SDL_GpuSampler *) value;
+    SDL_assert(key == &sampler->desc);
+    SDL_GpuDestroySampler(sampler);
 }
 
 SDL_GpuStateCache *
@@ -786,13 +814,32 @@ SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
     SDL_GpuRenderPass *pass = NULL;
     if (!cmdbuf) {
         SDL_InvalidParamError("cmdbuf");
+    } else if (cmdbuf->currently_encoding) {
+        SDL_SetError("There is already a pass encoding to this command buffer");
+    } else if (depth_attachment && !depth_attachment->texture) {
+        SDL_SetError("Depth attachment without a texture");
+    } else if (depth_attachment && ((depth_attachment->texture->desc.usage & SDL_GPUTEXUSAGE_RENDER_TARGET) == 0)) {
+        SDL_SetError("Depth attachment texture isn't a render target");
+    } else if (stencil_attachment && !stencil_attachment->texture) {
+        SDL_SetError("Stencil attachment without a texture");
+    } else if (stencil_attachment && ((stencil_attachment->texture->desc.usage & SDL_GPUTEXUSAGE_RENDER_TARGET) == 0)) {
+        SDL_SetError("Stencil attachment texture isn't a render target");
     } else {
+        Uint32 i;
+        for (i = 0; i < num_color_attachments; i++) {
+            if (color_attachments[i].texture && ((color_attachments[i].texture->desc.usage & SDL_GPUTEXUSAGE_RENDER_TARGET) == 0)) {
+                SDL_SetError("Color attachment #%u texture isn't a render target", (unsigned int) i);
+                return NULL;
+            }
+        }
         ALLOC_OBJ_WITH_LABEL(SDL_GpuRenderPass, pass, label);
         if (pass != NULL) {
             pass->device = cmdbuf->device;
             pass->cmdbuf = cmdbuf;
             if (pass->device->StartRenderPass(pass, num_color_attachments, color_attachments, depth_attachment, stencil_attachment) == -1) {
                 FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+            } else {
+                cmdbuf->currently_encoding = SDL_TRUE;
             }
         }
     }
@@ -882,9 +929,9 @@ SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex
 }
 
 int
-SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance)
+SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance)
 {
-    return pass ? pass->device->DrawInstancedIndexed(pass, index_count, index_type, index_buffer, index_offset, instance_count, base_instance) : SDL_InvalidParamError("pass");
+    return pass ? pass->device->DrawInstancedIndexed(pass, index_count, index_type, index_buffer, index_offset, instance_count, base_vertex, base_instance) : SDL_InvalidParamError("pass");
 }
 
 int
@@ -897,6 +944,7 @@ SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
 
     retval = pass->device->EndRenderPass(pass);
     if (retval == 0) {
+        pass->cmdbuf->currently_encoding = SDL_FALSE;
         FREE_AND_NULL_OBJ_WITH_LABEL(pass);
     }
     return retval;
@@ -909,6 +957,8 @@ SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
     SDL_GpuBlitPass *pass = NULL;
     if (!cmdbuf) {
         SDL_InvalidParamError("cmdbuf");
+    } else if (cmdbuf->currently_encoding) {
+        SDL_SetError("There is already a pass encoding to this command buffer");
     } else {
         ALLOC_OBJ_WITH_LABEL(SDL_GpuBlitPass, pass, label);
         if (pass != NULL) {
@@ -916,6 +966,8 @@ SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
             pass->cmdbuf = cmdbuf;
             if (pass->device->StartBlitPass(pass) == -1) {
                 FREE_AND_NULL_OBJ_WITH_LABEL(pass);
+            } else {
+                cmdbuf->currently_encoding = SDL_TRUE;
             }
         }
     }
@@ -1060,6 +1112,7 @@ SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
 
     retval = pass->device->EndBlitPass(pass);
     if (retval == 0) {
+        pass->cmdbuf->currently_encoding = SDL_FALSE;
         FREE_AND_NULL_OBJ_WITH_LABEL(pass);
     }
     return retval;
@@ -1168,10 +1221,14 @@ SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval)
         return SDL_SetError("Window does not have a prepared backbuffer (call SDL_GpuGetBackbuffer first!)");
     } else if (device->Present(device, window, (SDL_GpuTexture *) window->gpu_backbuffer, swapinterval) == -1) {
         return -1;
+    } else {
+        /* Note that we free the memory from our abstract object but we do not destroy the texture.
+           That clean up should have been done by device->Present, since they usually don't work the same way as normal textures! */
+        SDL_GpuTexture *backbuffer = (SDL_GpuTexture *) window->gpu_backbuffer;
+        FREE_AND_NULL_OBJ_WITH_DESC(backbuffer);  /* it's in-flight, mark the window as having no current backbuffer. */
+        window->gpu_backbuffer = NULL;
     }
 
-    window->gpu_backbuffer = NULL;  /* it's in-flight, mark the window as having no current backbuffer. */
-
     return 0;
 }
 
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 48fefaa0200c6..8049eba5261cb 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -77,6 +77,7 @@ struct SDL_GpuCommandBuffer
 {
     SDL_GpuDevice *device;
     const char *label;
+    SDL_bool currently_encoding;
     void *driverdata;
 };
 
@@ -111,6 +112,7 @@ struct SDL_GpuDevice
 
     void (*DestroyDevice)(SDL_GpuDevice *device);
 
+    /* !!! FIXME: we need an UnclaimWindow for when the device (or window!) is being destroyed */
     int (*ClaimWindow)(SDL_GpuDevice *device, SDL_Window *window);
 
     int (*CreateCpuBuffer)(SDL_GpuCpuBuffer *buffer, const void *data);
@@ -142,7 +144,7 @@ struct SDL_GpuDevice
     int (*StartRenderPass)(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment);
     int (*SetRenderPassPipeline)(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
     int (*SetRenderPassViewport)(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar);
-    int (*SetRenderPassScissor)(SDL_GpuRenderPass *pass, double x, double y, double width, double height);
+    int (*SetRenderPassScissor)(SDL_GpuRenderPass *pass, Uint32 x, Uint32 y, Uint32 width, Uint32 height);
     int (*SetRenderPassBlendConstant)(SDL_GpuRenderPass *pass, double red, double green, double blue, double alpha);
     int (*SetRenderPassVertexBuffer)(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index);
     int (*SetRenderPassVertexSampler)(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index);
@@ -153,7 +155,7 @@ struct SDL_GpuDevice
     int (*Draw)(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count);
     int (*DrawIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset);
     int (*DrawInstanced)(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
-    int (*DrawInstancedIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance);
+    int (*DrawInstancedIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance);
     int (*EndRenderPass)(SDL_GpuRenderPass *pass);
 
     int (*StartBlitPass)(SDL_GpuBlitPass *pass);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
index df5c9f14ea93c..110c64ae2b316 100644
--- a/src/gpu/dummy/SDL_gpu_dummy.c
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -20,7 +20,7 @@
 */
 #include "../../SDL_internal.h"
 
-/* The high-level gpu subsystem */
+/* The gpu subsystem dummy driver */
 
 #include "SDL.h"
 #include "../SDL_sysgpu.h"
@@ -73,7 +73,7 @@ static void DUMMY_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer) {}
 static int DUMMY_GpuStartRenderPass(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment) { return 0; }
 static int DUMMY_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline) { return 0; }
 static int DUMMY_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar) { return 0; }
-static int DUMMY_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, double x, double y, double width, double height) { return 0; }
+static int DUMMY_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, Uint32 x, Uint32 y, Uint32 width, Uint32 height) { return 0; }
 static int DUMMY_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, double red, double green, double blue, double alpha) { return 0; }
 static int DUMMY_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index) { return 0; }
 static int DUMMY_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index) { return 0; }
@@ -84,7 +84,7 @@ static int DUMMY_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_Gp
 static int DUMMY_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count) { return 0; }
 static int DUMMY_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset) { return 0; }
 static int DUMMY_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance) { return 0; }
-static int DUMMY_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_instance) { return 0; }
+static int DUMMY_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance) { return 0; }
 static int DUMMY_GpuEndRenderPass(SDL_GpuRenderPass *pass) { return 0; }
 static int DUMMY_GpuStartBlitPass(SDL_GpuBlitPass *pass) { return 0; }
 static int DUMMY_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }

From b097d23570a6ddd8587f68d0c0f1668662960216 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 10 May 2022 23:53:36 -0400
Subject: [PATCH 36/54] gpu: move getbackbuffer and present code next to each
 other.

---
 include/SDL_gpu.h             | 84 ++++++++++++++++++++---------------
 src/gpu/SDL_gpu.c             | 72 +++++++++++++++++++-----------
 src/gpu/SDL_sysgpu.h          |  3 +-
 src/gpu/dummy/SDL_gpu_dummy.c |  4 +-
 4 files changed, 97 insertions(+), 66 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 77fe33f2731d6..8fe16bb657be3 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -249,33 +249,6 @@ SDL_GpuShader *SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, con
 void SDL_GpuDestroyShader(SDL_GpuShader *shader);
 
 
-/*
- * Get a texture that can be used for rendering to an SDL window. The SDL_Window
- * may be destroyed and recreated internally on first use if incompatible with
- * the SDL_GpuDevice! As such, it does not need to be created with
- * SDL_WINDOW_OPENGL or _VULKAN, etc, as this API will take care of it.
- *
- * SDL_Windows can not be used with more than one GPU device at a time (even if
- * they're both using the same backend).
- *
- * Do not save this texture beyond rendering a single frame! It's likely that
- * a window has multiple textures that it cycles through as it renders and
- * presents frames (and even those textures might get replaced if the window
- * is resized or hidden or if the OS just feels like it moment by moment).
- *
- * A window backbuffer's texture is considered "in-flight" when a SDL_GpuPresent()
- * call is made with that texture. Until then, this function will return the same
- * texture for the same window, in case you plan to do several rendering passes
- * before presenting. Once a backbuffer is in-flight, the next call to this
- * function may return a different texture (and/or block until a texture
- * becomes available; you can use fences to avoid blocking).
- *
- * Do not call this function from multiple threads for the same device/window
- * at the same time.
- */
-SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
-
-
 /* PRECOOKED STATE OBJECTS... */
 
 typedef enum SDL_GpuBlendOperation
@@ -716,17 +689,58 @@ int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **bu
    abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */
 void SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs);
 
+/*
+ * Get a texture that can be used for rendering to an SDL window. The SDL_Window
+ * may be destroyed and recreated internally on first use if incompatible with
+ * the SDL_GpuDevice! As such, it does not need to be created with
+ * SDL_WINDOW_OPENGL or _VULKAN, etc, as this API will take care of it.
+ *
+ * SDL_Windows can not be used with more than one GPU device at a time (even if
+ * they're both using the same backend).
+ *
+ * Do not save this texture beyond rendering a single frame! It's likely that
+ * a window has multiple textures that it cycles through as it renders and
+ * presents frames (and even those textures might get replaced if the window
+ * is resized or hidden or if the OS just feels like it moment by moment).
+ *
+ * A window backbuffer's texture is considered "in-flight" when a SDL_GpuPresent()
+ * call is made with that texture. Until then, this function will return the same
+ * texture for the same window, in case you plan to do several rendering passes
+ * before presenting. Once a backbuffer is in-flight, the next call to this
+ * function may return a different texture (and/or block until a texture
+ * becomes available; you can use fences to avoid blocking).
+ *
+ * Do not call this function from multiple threads for the same device/window
+ * at the same time.
+ *
+ * This will return NULL for several reasons, but this should not be considered
+ * a fatal error! For example, some platforms will refuse to supply you a
+ * backbuffer if the window is currently minimized, but this doesn't mean it
+ * won't work again on a future rendering frame. In the case of a NULL result,
+ * you should just decline to render and present this frame (and maybe sleep
+ * a little so you use less CPU time until you are able to render again).
+ *
+ * A backbuffer's contents are undefined after the start of this call. They
+ * might have a previous frame's contents, or be cleared, or contain random
+ * pixels at any time. A render pass with SDL_GPUPASSINIT_LOAD will not be
+ * meaningful for this texture until you've initialized it in some form.
+ */
+SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
+
 /* Present a window's current backbuffer to the display. This will take the current SDL_GpuTexture returned by SDL_GpuGetBackbuffer
- *  and queue it for presentation. The presentation request is queued after any submitted command buffers, so you should call this
- *  as soon as you've submitted any command buffers that provide the rendering you'd like to do for the frame without waiting for
- *  those command buffers to finish processing.
+ * and queue it for presentation. The presentation request is queued after any submitted command buffers, so you should call this
+ * as soon as you've submitted any command buffers that provide the rendering you'd like to do for the frame without waiting for
+ * those command buffers to finish processing.
+ *
  * As soon as this call completes, the backbuffer is considered "in-flight."
+ *
  * While the backbuffer is in-flight, the next call to SDL_GpuGetBackbuffer will return a different
- *  texture and/or block. Do not use this backbuffer again after requesting presentation with it, as its pointer is
- *  considered invalid and you should request a new one from SDL_GpuGetBackbuffer. Note that presenting a window with vsync will
- *  not block here, as this just queues the request. You should call this once per frame after rendering to a new backbuffer. If you
- *  haven't rendered to a backbuffer before presenting (or requested one with SDL_GpuGetBackbuffer), the results of this call are
- *  undefined.
+ * texture and/or block. Do not use this backbuffer again after requesting presentation with it, as its pointer becomes
+ * invalid; you should request a new one from SDL_GpuGetBackbuffer for future rendering. Note that presenting a window
+ * with vsync will not block here, as this just queues the request.
+ *
+ * You should call this once per frame after rendering to a new backbuffer. If you haven't rendered to a backbuffer
+ * before presenting, the results of this call are undefined.
  */
 int SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval);
 
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index d1fcfe97f2e9b..3e94c2766df48 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -349,33 +349,6 @@ SDL_GpuDestroyShader(SDL_GpuShader *shader)
     }
 }
 
-SDL_GpuTexture *
-SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
-{
-    SDL_GpuTexture *retval = NULL;
-    if (!device) {
-        SDL_InvalidParamError("device");
-    } else if (!window) {
-        SDL_InvalidParamError("window");
-    } else if (window->gpu_device && (window->gpu_device != device)) {
-        SDL_SetError("Window is being used by another GPU device");
-    } else {
-        if (window->gpu_device == NULL) {
-            if (device->ClaimWindow(device, window) == -1) {
-                return NULL;
-            }
-            window->gpu_device = device;
-        }
-
-        if (!window->gpu_backbuffer) {   /* if !NULL, already requested one that isn't yet in-flight for presentation. */
-            window->gpu_backbuffer = device->GetBackbuffer(device, window);
-        }
-
-        retval = (SDL_GpuTexture *) window->gpu_backbuffer;
-    }
-    return retval;
-}
-
 SDL_GpuPipeline *
 SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc)
 {
@@ -1208,6 +1181,51 @@ SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmd
     }
 }
 
+SDL_GpuTexture *
+SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
+{
+    SDL_GpuTexture *retval = NULL;
+    if (!device) {
+        SDL_InvalidParamError("device");
+    } else if (!window) {
+        SDL_InvalidParamError("window");
+    } else if (window->gpu_device && (window->gpu_device != device)) {
+        SDL_SetError("Window is being used by another GPU device");
+    } else {
+        if (window->gpu_device == NULL) {
+            if (device->ClaimWindow(device, window) == -1) {
+                return NULL;
+            }
+            window->gpu_device = device;
+        }
+
+        if (!window->gpu_backbuffer) {   /* if !NULL, already requested one that isn't yet in-flight for presentation. */
+            SDL_GpuTextureDescription desc;
+            char label[128];
+            SDL_snprintf(label, sizeof (label), "Window backbuffer frame #%llu ('%s')", (unsigned long long) window->gpu_framenum, window->title);
+            SDL_zero(desc);
+            desc.label = label;
+            desc.texture_type = SDL_GPUTEXTYPE_2D;
+            desc.usage = SDL_GPUTEXUSAGE_RENDER_TARGET;
+            desc.depth_or_slices = 1;
+            desc.mipmap_levels = 1;
+            ALLOC_OBJ_WITH_DESC(SDL_GpuTexture, retval, &desc);
+            if (retval != NULL) {
+                retval->device = device;
+                if (device->GetBackbuffer(device, window, retval) == -1) {   /* backend is expected to fill in width, height, and pixel_format of retval->desc! */
+                    FREE_AND_NULL_OBJ_WITH_DESC(retval);
+                } else {
+                    window->gpu_framenum++;
+                    window->gpu_backbuffer = retval;
+                }
+            }
+        }
+
+        retval = (SDL_GpuTexture *) window->gpu_backbuffer;
+    }
+    return retval;
+}
+
 int
 SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval)
 {
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 8049eba5261cb..09b57afc6072b 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -129,8 +129,6 @@ struct SDL_GpuDevice
     int (*CreateShader)(SDL_GpuShader *shader, const Uint8 *bytecode, const Uint32 bytecodelen);
     void (*DestroyShader)(SDL_GpuShader *shader);
 
-    SDL_GpuTexture *(*GetBackbuffer)(SDL_GpuDevice *device, SDL_Window *window);
-
     int (*CreatePipeline)(SDL_GpuPipeline *pipeline);
     void (*DestroyPipeline)(SDL_GpuPipeline *pipeline);
 
@@ -169,6 +167,7 @@ struct SDL_GpuDevice
     int (*CopyFromTextureToBuffer)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
     int (*EndBlitPass)(SDL_GpuBlitPass *pass);
 
+    int (*GetBackbuffer)(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *texture);
     int (*Present)(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval);
 
     int (*CreateFence)(SDL_GpuFence *fence);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
index 110c64ae2b316..a1ee6ef8cff97 100644
--- a/src/gpu/dummy/SDL_gpu_dummy.c
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -62,7 +62,6 @@ static int DUMMY_GpuCreateTexture(SDL_GpuTexture *texture) { return 0; }
 static void DUMMY_GpuDestroyTexture(SDL_GpuTexture *texture) {}
 static int DUMMY_GpuCreateShader(SDL_GpuShader *shader, const Uint8 *bytecode, const Uint32 bytecodelen) { return 0; }
 static void DUMMY_GpuDestroyShader(SDL_GpuShader *shader) {}
-static SDL_GpuTexture *DUMMY_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window) { return NULL; /* !!! FIXME */ }
 static int DUMMY_GpuCreatePipeline(SDL_GpuPipeline *pipeline) { return 0; }
 static void DUMMY_GpuDestroyPipeline(SDL_GpuPipeline *pipeline) {}
 static int DUMMY_GpuCreateSampler(SDL_GpuSampler *sampler) { return 0; }
@@ -96,6 +95,7 @@ static int DUMMY_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *src
 static int DUMMY_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
 static int DUMMY_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch) { return 0; }
 static int DUMMY_GpuEndBlitPass(SDL_GpuBlitPass *pass) { return 0; }
+static int DUMMY_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *texture) { return 0; }
 static int DUMMY_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval) { return 0; }
 static int DUMMY_GpuCreateFence(SDL_GpuFence *fence) { return 0; }
 static void DUMMY_GpuDestroyFence(SDL_GpuFence *fence) {}
@@ -118,7 +118,6 @@ DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
     device->DestroyTexture = DUMMY_GpuDestroyTexture;
     device->CreateShader = DUMMY_GpuCreateShader;
     device->DestroyShader = DUMMY_GpuDestroyShader;
-    device->GetBackbuffer = DUMMY_GpuGetBackbuffer;
     device->CreatePipeline = DUMMY_GpuCreatePipeline;
     device->DestroyPipeline = DUMMY_GpuDestroyPipeline;
     device->CreateSampler = DUMMY_GpuCreateSampler;
@@ -152,6 +151,7 @@ DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
     device->CopyFromBufferToTexture = DUMMY_GpuCopyFromBufferToTexture;
     device->CopyFromTextureToBuffer = DUMMY_GpuCopyFromTextureToBuffer;
     device->EndBlitPass = DUMMY_GpuEndBlitPass;
+    device->GetBackbuffer = DUMMY_GpuGetBackbuffer;
     device->Present = DUMMY_GpuPresent;
     device->CreateFence = DUMMY_GpuCreateFence;
     device->DestroyFence = DUMMY_GpuDestroyFence;

From 9b8a0fe880a3cf208dda62b673e6b350f39aa914 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 May 2022 00:06:36 -0400
Subject: [PATCH 37/54] gpu: submit/abandon one command buffer at a time.

This was meant to be a nod to Vulkan, which can take an array of
command buffers to submit in a single call (presumably with some
chance of atomicity), but this turned out to be a hassle for the
implementation, and honestly who actually needs this?
---
 include/SDL_gpu.h             |  6 ++---
 src/gpu/SDL_gpu.c             | 48 ++++++++++++-----------------------
 src/gpu/SDL_sysgpu.h          |  5 ++--
 src/gpu/dummy/SDL_gpu_dummy.c |  8 +++---
 test/testgpu_simple_clear.c   |  2 +-
 test/testgpu_spinning_cube.c  |  4 +--
 6 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index 8fe16bb657be3..dfa6181736006 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -679,15 +679,15 @@ int SDL_GpuWaitFence(SDL_GpuFence *fence);
 
 
 /*
- * Once you've encoded your command buffer(s), you can submit them to the GPU for executing.
+ * Once you've encoded your command buffer, you can submit it to the GPU for executing.
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
  */
-int SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence);
+int SDL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
    abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */
-void SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs);
+void SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer);
 
 /*
  * Get a texture that can be used for rendering to an SDL window. The SDL_Window
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 3e94c2766df48..3fb920fbc4382 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -1137,47 +1137,31 @@ SDL_GpuWaitFence(SDL_GpuFence *fence)
 }
 
 int
-SDL_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence)
+SDL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence)
 {
     int retval;
-    Uint32 i;
-
-    if (!device) {
-        return SDL_InvalidParamError("device");
-    } else if (fence && (fence->device != device)) {
-        return SDL_SetError("Fence is not from this device");
-    }
 
-    for (i = 0; i < numcmdbufs; i++) {
-        if (!buffers[i]) {
-            return SDL_SetError("Can't submit a NULL command buffer");
-        } else if (buffers[i]->device != device) {
-            return SDL_SetError("Command buffer is not from this device");
-        }
+    if (!cmdbuf) {
+        return SDL_InvalidParamError("cmdbuf");
+    } else if (cmdbuf->currently_encoding) {
+        return SDL_SetError("There is a pass still encoding to command buffer");
+    } else if (fence && (fence->device != cmdbuf->device)) {
+        return SDL_SetError("Fence is not from command buffer's device");
     }
 
-    retval = device->SubmitCommandBuffers(device, buffers, numcmdbufs, fence);
-
-    if (retval == 0) {
-        for (i = 0; i < numcmdbufs; i++) {
-            FREE_AND_NULL_OBJ_WITH_LABEL(buffers[i]);
-        }
-    }
+    retval = cmdbuf->device->SubmitCommandBuffer(cmdbuf, fence);
+    FREE_AND_NULL_OBJ_WITH_LABEL(cmdbuf);
 
     return retval;
 }
 
 void
-SDL_GpuAbandonCommandBuffers(SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs)
+SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer)
 {
-    if (buffers) {
-        Uint32 i;
-        for (i = 0; i < numcmdbufs; i++) {
-            if (buffers[i]) {
-                buffers[i]->device->AbandonCommandBuffer(buffers[i]);
-                FREE_AND_NULL_OBJ_WITH_LABEL(buffers[i]);
-            }
-        }
+    if (buffer) {
+        /* !!! FIXME: deal with buffer->currently_encoding */
+        buffer->device->AbandonCommandBuffer(buffer);
+        FREE_AND_NULL_OBJ_WITH_LABEL(buffer);
     }
 }
 
@@ -1274,14 +1258,14 @@ SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *devi
          ((blit = SDL_GpuStartBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
         SDL_GpuCopyBufferCpuToGpu(blit, staging, 0, gpubuf, 0, buflen);
         SDL_GpuEndBlitPass(blit);
-        SDL_GpuSubmitCommandBuffers(device, &cmd, 1, fence);
+        SDL_GpuSubmitCommandBuffer(cmd, fence);
         SDL_GpuWaitFence(fence);  /* so we know it's definitely uploaded */
         retval = gpubuf;
     }
 
     if (!retval) {
         SDL_GpuEndBlitPass(blit);   /* assume this might be un-ended. */
-        SDL_GpuAbandonCommandBuffers(&cmd, 1);
+        SDL_GpuAbandonCommandBuffer(cmd);
         SDL_GpuDestroyBuffer(gpubuf);
     }
     SDL_GpuDestroyCpuBuffer(staging);
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 09b57afc6072b..f9ecb979cc59e 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -136,9 +136,6 @@ struct SDL_GpuDevice
     void (*DestroySampler)(SDL_GpuSampler *sampler);
 
     int (*CreateCommandBuffer)(SDL_GpuCommandBuffer *cmdbuf);
-    int (*SubmitCommandBuffers)(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence);
-    void (*AbandonCommandBuffer)(SDL_GpuCommandBuffer *buffer);
-
     int (*StartRenderPass)(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment);
     int (*SetRenderPassPipeline)(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
     int (*SetRenderPassViewport)(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar);
@@ -155,6 +152,8 @@ struct SDL_GpuDevice
     int (*DrawInstanced)(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance);
     int (*DrawInstancedIndexed)(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance);
     int (*EndRenderPass)(SDL_GpuRenderPass *pass);
+    int (*SubmitCommandBuffer)(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence);
+    void (*AbandonCommandBuffer)(SDL_GpuCommandBuffer *buffer);
 
     int (*StartBlitPass)(SDL_GpuBlitPass *pass);
     int (*CopyBetweenTextures)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
index a1ee6ef8cff97..f41f3bce5e1f2 100644
--- a/src/gpu/dummy/SDL_gpu_dummy.c
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -67,8 +67,6 @@ static void DUMMY_GpuDestroyPipeline(SDL_GpuPipeline *pipeline) {}
 static int DUMMY_GpuCreateSampler(SDL_GpuSampler *sampler) { return 0; }
 static void DUMMY_GpuDestroySampler(SDL_GpuSampler *sampler) {}
 static int DUMMY_GpuCreateCommandBuffer(SDL_GpuCommandBuffer *cmdbuf) { return 0; }
-static int DUMMY_GpuSubmitCommandBuffers(SDL_GpuDevice *device, SDL_GpuCommandBuffer **buffers, const Uint32 numcmdbufs, SDL_GpuFence *fence) { return 0; }
-static void DUMMY_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer) {}
 static int DUMMY_GpuStartRenderPass(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment) { return 0; }
 static int DUMMY_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline) { return 0; }
 static int DUMMY_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar) { return 0; }
@@ -95,6 +93,8 @@ static int DUMMY_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *src
 static int DUMMY_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
 static int DUMMY_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch) { return 0; }
 static int DUMMY_GpuEndBlitPass(SDL_GpuBlitPass *pass) { return 0; }
+static int DUMMY_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence) { return 0; }
+static void DUMMY_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer) {}
 static int DUMMY_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *texture) { return 0; }
 static int DUMMY_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval) { return 0; }
 static int DUMMY_GpuCreateFence(SDL_GpuFence *fence) { return 0; }
@@ -123,8 +123,6 @@ DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
     device->CreateSampler = DUMMY_GpuCreateSampler;
     device->DestroySampler = DUMMY_GpuDestroySampler;
     device->CreateCommandBuffer = DUMMY_GpuCreateCommandBuffer;
-    device->SubmitCommandBuffers = DUMMY_GpuSubmitCommandBuffers;
-    device->AbandonCommandBuffer = DUMMY_GpuAbandonCommandBuffer;
     device->StartRenderPass = DUMMY_GpuStartRenderPass;
     device->SetRenderPassPipeline = DUMMY_GpuSetRenderPassPipeline;
     device->SetRenderPassViewport = DUMMY_GpuSetRenderPassViewport;
@@ -151,6 +149,8 @@ DUMMY_GpuCreateDevice(SDL_GpuDevice *device)
     device->CopyFromBufferToTexture = DUMMY_GpuCopyFromBufferToTexture;
     device->CopyFromTextureToBuffer = DUMMY_GpuCopyFromTextureToBuffer;
     device->EndBlitPass = DUMMY_GpuEndBlitPass;
+    device->SubmitCommandBuffer = DUMMY_GpuSubmitCommandBuffer;
+    device->AbandonCommandBuffer = DUMMY_GpuAbandonCommandBuffer;
     device->GetBackbuffer = DUMMY_GpuGetBackbuffer;
     device->Present = DUMMY_GpuPresent;
     device->CreateFence = DUMMY_GpuCreateFence;
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index 236eeb54331f4..a6bd2940297dc 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -81,7 +81,7 @@ static void render(SDL_Window *window)
     SDL_GpuEndRenderPass(pass);
 
     /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
-    SDL_GpuSubmitCommandBuffers(gpuDevice, &cmd, 1, NULL);
+    SDL_GpuSubmitCommandBuffer(cmd, NULL);
     SDL_GpuPresent(gpuDevice, window, 1);
 }
 
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index 50e4cb4a53906..61d619d7ef15f 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -260,7 +260,6 @@ Render(SDL_Window *window, const int windownum)
 {
     WindowState *winstate = &window_states[windownum];
     SDL_GpuTexture *backbuffer = SDL_GpuGetBackbuffer(gpu_device, window);
-    const SDL_GpuPresentType presenttype = (state->render_flags & SDL_RENDERER_PRESENTVSYNC) ? SDL_GPUPRESENT_VSYNC : SDL_GPUPRESENT_IMMEDIATE;
     SDL_GpuColorAttachmentDescription color_attachment;
     SDL_GpuDepthAttachmentDescription depth_attachment;
     SDL_GpuTexture **depth_texture_ptr;
@@ -362,7 +361,8 @@ Render(SDL_Window *window, const int windownum)
     SDL_GpuDraw(render, 0, SDL_arraysize(vertex_data));
     SDL_GpuEndRenderPass(render);
 
-    SDL_GpuSubmitCommandBuffers(gpu_device, &cmd, 1, NULL);  /* push work to the GPU and tell it to present to the window when done. */
+    /* push work to the GPU and tell it to present to the window when done. */
+    SDL_GpuSubmitCommandBuffer(cmd, NULL);
     SDL_GpuPresent(gpu_device, window, 1);
 }
 

From 8259993134001e55c47321010b61c3770236668b Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 May 2022 00:12:30 -0400
Subject: [PATCH 38/54] gpu: Initial work on a Metal backend.

This hasn't even been compiled yet--I didn't write it on a Mac!--so this
likely has obvious syntax problems and copy/paste mistakes, etc.

This implements everything but the elephant in the room--shaders--since
I'm still deciding what I want to do there, so even once the thing
compiles you can't use it yet.

But this is a big chunk of progress!
---
 CMakeLists.txt                                |    1 +
 include/build_config/SDL_build_config.h.cmake |    2 +
 include/build_config/SDL_build_config_ios.h   |    4 +
 include/build_config/SDL_build_config_macos.h |    3 +
 src/gpu/metal/SDL_gpu_metal.m                 | 1648 +++++++++++++++++
 src/video/SDL_sysvideo.h                      |    2 +
 6 files changed, 1660 insertions(+)
 create mode 100644 src/gpu/metal/SDL_gpu_metal.m

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 042d6b52509dd..4bdfe6e3637c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2206,6 +2206,7 @@ elseif(APPLE)
         if(SDL_RENDER_METAL)
           sdl_glob_sources("${SDL3_SOURCE_DIR}/src/render/metal/*.m")
           set(SDL_VIDEO_RENDER_METAL 1)
+          set(SDL_GPU_METAL 1)
           set(HAVE_RENDER_METAL TRUE)
         endif()
       endif()
diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake
index 0e5ccf6e18a55..6ed29256bf847 100644
--- a/include/build_config/SDL_build_config.h.cmake
+++ b/include/build_config/SDL_build_config.h.cmake
@@ -485,6 +485,8 @@
 #cmakedefine SDL_VIDEO_RENDER_PSP @SDL_VIDEO_RENDER_PSP@
 #cmakedefine SDL_VIDEO_RENDER_VITA_GXM @SDL_VIDEO_RENDER_VITA_GXM@
 
+#cmakedefine SDL_GPU_METAL @SDL_GPU_METAL@
+
 /* Enable OpenGL support */
 #cmakedefine SDL_VIDEO_OPENGL @SDL_VIDEO_OPENGL@
 #cmakedefine SDL_VIDEO_OPENGL_ES @SDL_VIDEO_OPENGL_ES@
diff --git a/include/build_config/SDL_build_config_ios.h b/include/build_config/SDL_build_config_ios.h
index c180ef84908d6..e1740b921ed52 100644
--- a/include/build_config/SDL_build_config_ios.h
+++ b/include/build_config/SDL_build_config_ios.h
@@ -194,6 +194,10 @@
 #define SDL_VIDEO_VULKAN 1
 #endif
 
+#if SDL_PLATFORM_SUPPORTS_METAL
+#define SDL_GPU_METAL 1
+#endif
+
 #if SDL_PLATFORM_SUPPORTS_METAL
 #define SDL_VIDEO_METAL 1
 #endif
diff --git a/include/build_config/SDL_build_config_macos.h b/include/build_config/SDL_build_config_macos.h
index ed3f83a645579..1059aee46c804 100644
--- a/include/build_config/SDL_build_config_macos.h
+++ b/include/build_config/SDL_build_config_macos.h
@@ -227,6 +227,9 @@
 #endif
 #endif
 
+/* !!! FIXME: this should be separate config stuff. */
+#define SDL_GPU_METAL SDL_VIDEO_RENDER_METAL
+
 /* Enable OpenGL support */
 #ifndef SDL_VIDEO_OPENGL
 #define SDL_VIDEO_OPENGL    1
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
new file mode 100644
index 0000000000000..037438d6b51fd
--- /dev/null
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -0,0 +1,1648 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+#include "../../SDL_internal.h"
+
+#if SDL_GPU_METAL
+
+/* The Apple Metal driver for the GPU subsystem. */
+
+#include "SDL.h"
+#include "../SDL_sysgpu.h"
+
+#include <Availability.h>
+#import <Metal/Metal.h>
+#import <QuartzCore/CAMetalLayer.h>
+
+#ifdef __MACOSX__
+#import <AppKit/NSWindow.h>
+#import <AppKit/NSView.h>
+#endif
+
+#if !__has_feature(objc_arc)
+#error Please build with ARC support.
+#endif
+
+@interface METAL_GpuDeviceData : NSObject
+    @property (nonatomic, retain) id<MTLDevice> mtldevice;
+    @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
+@end
+
+@implementation METAL_GpuDeviceData
+@end
+
+
+@interface METAL_GpuWindowData : NSObject
+    @property (nonatomic, assign) SDL_MetalView mtlview;
+    @property (nonatomic, retain) CAMetalLayer *mtllayer;
+    @property (nonatomic, retain) id<CAMetalDrawable> mtldrawable;  // current backbuffer
+@end
+
+@implementation METAL_GpuWindowData
+@end
+
+
+@interface METAL_GpuBufferData : NSObject  // this covers CPU and GPU buffers.
+    @property (nonatomic, retain) id<MTLBuffer> mtlbuf;
+@end
+
+@implementation METAL_GpuWindowData
+@end
+
+
+@interface METAL_GpuTextureData : NSObject
+    @property (nonatomic, retain) id<MTLTexture> mtltexture;
+@end
+
+@implementation METAL_GpuTextureData
+@end
+
+@interface METAL_GpuShaderData : NSObject
+    @property (nonatomic, retain) id<MTLFunction> mtlfunction;
+@end
+
+@implementation METAL_GpuShaderData
+@end
+
+@interface METAL_GpuPipelineData : NSObject
+    @property (nonatomic, retain) id<MTLRenderPipelineState> mtlpipeline;
+    @property (nonatomic, retain) id<MTLDepthStencilState> mtldepthstencil;
+
+    // these are part of the SDL GPU pipeline but not MTLRenderPipelineState, so we
+    // keep a copy and set them when setting a new pipeline state.
+    MTLPrimitiveType mtlprimitive;
+    MTLTriangleFillMode mtlfillmode;
+    MTLWinding mtlfrontface;
+    MTLCullMode mtlcullface;
+    float depth_bias;
+    float depth_bias_scale;
+    float depth_bias_clamp;
+    Uint32 front_stencil_reference;
+    Uint32 back_stencil_reference;
+@end
+
+@implementation METAL_GpuPipelineData
+@end
+
+@interface METAL_GpuSamplerData : NSObject
+    @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
+@end
+
+@implementation METAL_GpuSamplerData
+@end
+
+@interface METAL_GpuCommandBufferData : NSObject
+    @property (nonatomic, retain) id<MTLCommandBuffer> mtlcmdbuf;
+@end
+
+@implementation METAL_GpuCommandBufferData
+@end
+
+@interface METAL_GpuRenderPassData : NSObject
+    @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlpass;
+    // current state of things, so we don't re-set a currently set state.
+    @property (nonatomic, retain) id<MTLRenderPipelineState> mtlpipeline;
+    @property (nonatomic, retain) id<MTLDepthStencilState> mtldepthstencil;
+    MTLPrimitiveType mtlprimitive;
+    MTLTriangleFillMode mtlfillmode;
+    MTLWinding mtlfrontface;
+    MTLCullMode mtlcullface;
+    float depth_bias;
+    float depth_bias_scale;
+    float depth_bias_clamp;
+    Uint32 front_stencil_reference;
+    Uint32 back_stencil_reference;
+    MTLViewport viewport;
+    MTLScissorRect scissor;
+    float blend_constant_red;
+    float blend_constant_green;
+    float blend_constant_blue;
+    float blend_constant_alpha;
+@end
+
+@implementation METAL_GpuRenderPassData
+@end
+
+@interface METAL_GpuBlitPassData : NSObject
+    @property (nonatomic, retain) id<MTLBlitCommandEncoder> mtlpass;
+@end
+
+@implementation METAL_GpuBlitPassData
+@end
+
+@interface METAL_GpuFenceData : NSObject
+    @property (nonatomic, assign) SDL_atomic_t flag;
+    @property (nonatomic, assign) SDL_mutex *mutex;
+    @property (nonatomic, assign) SDL_cond *condition;
+@end
+
+@implementation METAL_GpuFenceData
+- (void)dealloc
+{
+    if (self.mutex) {
+        SDL_DestroyMutex(self.mutex);
+    }
+
+    if (self.condition) {
+        SDL_DestroyCondition(self.condition);
+    }
+}
+@end
+
+
+#define METAL_PIXFMT_MAPPINGS \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_B5G6R5, MTLPixelFormatB5G6R5Unorm) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_BGR5A1, MTLPixelFormatBGR5A1Unorm) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_RGBA8, MTLPixelFormatRGBA8Unorm) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_RGBA8_sRGB, MTLPixelFormatRGBA8Unorm_sRGB) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_BGRA8, MTLPixelFormatBGRA8Unorm) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_BGRA8_sRGB, MTLPixelFormatBGRA8Unorm_sRGB) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_Depth24_Stencil8, MTLPixelFormatDepth24Unorm_Stencil8) \
+    METAL_MAPPIXFMT(SDL_GPUPIXELFMT_INVALID, MTLPixelFormatInvalid)
+
+static MTLPixelFormat
+PixelFormatToMetal(const SDL_GpuPixelFormat fmt)
+{
+    switch (fmt) {
+        #define METAL_MAPPIXFMT(sdlfmt, mtlfmt) case sdlfmt: return mtlfmt;
+        METAL_PIXFMT_MAPPINGS
+        #undef METAL_MAPPIXFMT
+    }
+
+    SDL_SetError("Unsupported pixel format");
+    return MTLPixelFormatInvalid;
+}
+
+static SDL_GpuPixelFormat
+PixelFormatFromMetal(const MTLPixelFormat fmt)
+{
+    switch (fmt) {
+        #define METAL_MAPPIXFMT(sdlfmt, mtlfmt) case mtlfmt: return sdlfmt;
+        METAL_PIXFMT_MAPPINGS
+        #undef METAL_MAPPIXFMT
+    }
+
+    SDL_SetError("Unsupported pixel format");
+    return SDL_GPUPIXELFMT_INVALID;
+}
+
+static MTLVertexFormat
+VertFormatToMetal(const SDL_GpuVertexFormat fmt)
+{
+    switch (fmt) {
+        case SDL_GPUVERTFMT_INVALID: return MTLVertexFormatInvalid;
+        case SDL_GPUVERTFMT_UCHAR2: return MTLVertexFormatUChar2;
+        case SDL_GPUVERTFMT_UCHAR4: return MTLVertexFormatUChar4;
+        case SDL_GPUVERTFMT_CHAR2: return MTLVertexFormatChar2;
+        case SDL_GPUVERTFMT_CHAR4: return MTLVertexFormatChar4;
+        case SDL_GPUVERTFMT_UCHAR2_NORMALIZED: return MTLVertexFormatUChar2Normalized;
+        case SDL_GPUVERTFMT_UCHAR4_NORMALIZED: return MTLVertexFormatUChar4Normalized;
+        case SDL_GPUVERTFMT_CHAR2_NORMALIZED: return MTLVertexFormatChar2Normalized;
+        case SDL_GPUVERTFMT_CHAR4_NORMALIZED: return MTLVertexFormatChar4Normalized;
+        case SDL_GPUVERTFMT_USHORT: return MTLVertexFormatUShort;
+        case SDL_GPUVERTFMT_USHORT2: return MTLVertexFormatUShort2;
+        case SDL_GPUVERTFMT_USHORT4: return MTLVertexFormatUShort4;
+        case SDL_GPUVERTFMT_SHORT: return MTLVertexFormatShort;
+        case SDL_GPUVERTFMT_SHORT2: return MTLVertexFormatShort2;
+        case SDL_GPUVERTFMT_SHORT4: return MTLVertexFormatShort4;
+        case SDL_GPUVERTFMT_USHORT_NORMALIZED: return MTLVertexFormatUShortNormalized;
+        case SDL_GPUVERTFMT_USHORT2_NORMALIZED: return MTLVertexFormatUShort2Normalized;
+        case SDL_GPUVERTFMT_USHORT4_NORMALIZED: return MTLVertexFormatUShort3Normalized;
+        case SDL_GPUVERTFMT_SHORT_NORMALIZED: return MTLVertexFormatShortNormalized;
+        case SDL_GPUVERTFMT_SHORT2_NORMALIZED: return MTLVertexFormatShort2Normalized;
+        case SDL_GPUVERTFMT_SHORT4_NORMALIZED: return MTLVertexFormatShort4Normalized;
+        case SDL_GPUVERTFMT_HALF: return MTLVertexFormatHalf;
+        case SDL_GPUVERTFMT_HALF2: return MTLVertexFormatHalf2;
+        case SDL_GPUVERTFMT_HALF4: return MTLVertexFormatHalf4;
+        case SDL_GPUVERTFMT_FLOAT: return MTLVertexFormatFloat;
+        case SDL_GPUVERTFMT_FLOAT2: return MTLVertexFormatFloat2;
+        case SDL_GPUVERTFMT_FLOAT3: return MTLVertexFormatFloat3;
+        case SDL_GPUVERTFMT_FLOAT4: return MTLVertexFormatFloat4;
+        case SDL_GPUVERTFMT_UINT: return MTLVertexFormatUInt;
+        case SDL_GPUVERTFMT_UINT2: return MTLVertexFormatUInt2;
+        case SDL_GPUVERTFMT_UINT3: return MTLVertexFormatUInt3;
+        case SDL_GPUVERTFMT_UINT4: return MTLVertexFormatUInt4;
+        case SDL_GPUVERTFMT_INT: return MTLVertexFormatInt;
+        case SDL_GPUVERTFMT_INT2: return MTLVertexFormatInt2;
+        case SDL_GPUVERTFMT_INT3: return MTLVertexFormatInt3;
+        case SDL_GPUVERTFMT_INT4: return MTLVertexFormatInt4;
+    }
+
+    SDL_assert(!"Unexpected vertex format");
+    return MTLVertexFormatInvalid;
+}
+
+static MTLBlendOperation
+BlendOpToMetal(const SDL_GpuBlendOperation op)
+{
+    switch (op) {
+        case SDL_GPUBLENDOP_ADD: return MTLBlendOperationAdd;
+        case SDL_GPUBLENDOP_SUBTRACT: return MTLBlendOperationSubtract;
+        case SDL_GPUBLENDOP_REVERSESUBTRACT: return MTLBlendOperationReverseSubtract;
+        case SDL_GPUBLENDOP_MIN: return MTLBlendOperationMin;
+        case SDL_GPUBLENDOP_MAX: return MTLBlendOperationMax;
+    }
+
+    SDL_assert(!"Unexpected blend operation");
+    return MTLVertexFormatAdd;
+}
+
+static MTLBlendFactor
+BlendFactorToMetal(const SDL_GpuBlendFactor factor)
+{
+    switch (factor) {
+        case SDL_GPUBLENDFACTOR_ZERO: return MTLBlendFactorZero;
+        case SDL_GPUBLENDFACTOR_ONE: return MTLBlendFactorOne;
+        case SDL_GPUBLENDFACTOR_SOURCECOLOR: return MTLBlendFactorSourceColor;
+        case SDL_GPUBLENDFACTOR_ONEMINUSSOURCECOLOR: return MTLBlendFactorOneMinusSourceColor;
+        case SDL_GPUBLENDFACTOR_SOURCEALPHA: return MTLBlendFactorSourceAlpha;
+        case SDL_GPUBLENDFACTOR_ONEMINUSSOURCEALPHA: return MTLBlendFactorOneMinusSourceAlpha;
+        case SDL_GPUBLENDFACTOR_DESTINATIONCOLOR: return MTLBlendFactorDestinationColor;
+        case SDL_GPUBLENDFACTOR_ONEMINUSDESTINATIONCOLOR: return MTLBlendFactorOneMinusDestinationColor;
+        case SDL_GPUBLENDFACTOR_DESTINATIONALPHA: return MTLBlendFactorDestinationAlpha;
+        case SDL_GPUBLENDFACTOR_ONEMINUSDESTINATIONALPHA: return MTLBlendFactorOneMinusDestinationAlpha;
+        case SDL_GPUBLENDFACTOR_SOURCEALPHASATURATED: return MTLBlendFactorSourceAlphaSaturated;
+        case SDL_GPUBLENDFACTOR_BLENDCOLOR: return MTLBlendFactorBlendColor;
+        case SDL_GPUBLENDFACTOR_ONEMINUSBLENDCOLOR: return MTLBlendFactorOneMinusBlendColor;
+        case SDL_GPUBLENDFACTOR_BLENDALPHA: return MTLBlendFactorBlendAlpha;
+        case SDL_GPUBLENDFACTOR_ONEMINUSBLENDALPHA: return MTLBlendFactorOneMinusBlendAlpha;
+        case SDL_GPUBLENDFACTOR_SOURCE1COLOR: return MTLBlendFactorSource1Color;
+        case SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1COLOR: return MTLBlendFactorOneMinusSource1Color;
+        case SDL_GPUBLENDFACTOR_SOURCE1ALPHA: return MTLBlendFactorSource1Alpha;
+        case SDL_GPUBLENDFACTOR_ONEMINUSSOURCE1ALPHA: return MTLBlendFactorOneMinusSource1Alpha;
+    }
+
+    SDL_assert(!"Unexpected blend factor");
+    return MTLBlendFactorZero;
+}
+
+static MTLPrimitiveTopologyClass
+PrimitiveTopologyToMetal(const SDL_GpuPrimitive prim)
+{
+    switch (prim) {
+        case SDL_GPUPRIM_POINT: return MTLPrimitiveTopologyClassPoint;
+        case SDL_GPUPRIM_LINE: return MTLPrimitiveTopologyClassLine;
+        case SDL_GPUPRIM_LINESTRIP: return MTLPrimitiveTopologyClassLine;
+        case SDL_GPUPRIM_TRIANGLE: return MTLPrimitiveTopologyClassTriangle;
+        case SDL_GPUPRIM_TRIANGLESTRIP: return MTLPrimitiveTopologyClassTriangle;
+    }
+
+    SDL_assert(!"Unexpected primitive topology");
+    return MTLPrimitiveTopologyClassUnspecified;
+}
+
+static MTLCompareFunction
+CompareFunctionToMetal(const SDL_GpuCompareFunction fn)
+{
+    switch (fn) {
+        case SDL_GPUCMPFUNC_NEVER: return MTLCompareFunctionNever;
+        case SDL_GPUCMPFUNC_LESS: return MTLCompareFunctionLess;
+        case SDL_GPUCMPFUNC_EQUAL: return MTLCompareFunctionEqual;
+        case SDL_GPUCMPFUNC_LESSEQUAL: return MTLCompareFunctionLessEqual;
+        case SDL_GPUCMPFUNC_GREATER: return MTLCompareFunctionGreater;
+        case SDL_GPUCMPFUNC_NOTEQUAL: return MTLCompareFunctionNotEqual;
+        case SDL_GPUCMPFUNC_GREATEREQUAL: return MTLCompareFunctionGreaterEqual;
+        case SDL_GPUCMPFUNC_ALWAYS: return MTLCompareFunctionAlways;
+    }
+
+    SDL_assert(!"Unexpected compare function");
+    return MTLCompareFunctionNever;
+}
+
+static MTLStencilOperation
+StencilOpToMetal(const SDL_GpuStencilOperation op)
+{
+    switch (op) {
+        case SDL_GPUSTENCILOP_KEEP: return MTLStencilOperationKeep;
+        case SDL_GPUSTENCILOP_ZERO: return MTLStencilOperationZero;
+        case SDL_GPUSTENCILOP_REPLACE: return MTLStencilOperationReplace;
+        case SDL_GPUSTENCILOP_INCREMENTCLAMP: return MTLStencilOperationIncrementClamp;
+        case SDL_GPUSTENCILOP_DECREMENTCLAMP: return MTLStencilOperationDecrementClamp;
+        case SDL_GPUSTENCILOP_INVERT: return MTLStencilOperationInvert;
+        case SDL_GPUSTENCILOP_INCREMENTWRAP: return MTLStencilOperationIncrementWrap;
+        case SDL_GPUSTENCILOP_DECREMENTWRAP: return MTLStencilOperationDecrementWrap;
+    }
+
+    SDL_assert(!"Unexpected stencil operation");
+    return MTLStencilOperationKeep;
+}
+
+static MTLPrimitiveType
+PrimitiveToMetal(const SDL_GpuPrimitive prim)
+{
+    switch (prim) {
+        case SDL_GPUPRIM_POINT: return MTLPrimitiveTypePoint;
+        case SDL_GPUPRIM_LINE: return MTLPrimitiveTypeLine;
+        case SDL_GPUPRIM_LINESTRIP: return MTLPrimitiveTypeLineStrip;
+        case SDL_GPUPRIM_TRIANGLE: return MTLPrimitiveTypeTriangle;
+        case SDL_GPUPRIM_TRIANGLESTRIP: return MTLPrimitiveTypeTriangleStrip;
+    }
+
+    SDL_assert(!"Unexpected primitive type");
+    return MTLPrimitiveTypeTriangleStrip;
+}
+
+static MTLTriangleFillMode
+FillModeToMetal(const SDL_GpuFillMode fill)
+{
+    switch (fill) {
+        case SDL_GPUFILL_FILL: return MTLTriangleFillModeFill;
+        case SDL_GPUFILL_LINE: return MTLTriangleFillModeLines;
+    }
+
+    SDL_assert(!"Unexpected fill mode");
+    return MTLTriangleFillModeFill;
+}
+
+static MTLWinding
+FrontFaceToMetal(const SDL_GpuFrontFace winding)
+{
+    switch (winding) {
+        case SDL_GPUFRONTFACE_COUNTER_CLOCKWISE: return MTLWindingCounterClockwise;
+        case SDL_GPUFRONTFACE_CLOCKWISE: return MTLWindingClockwise;
+    }
+
+    SDL_assert(!"Unexpected winding mode");
+    return MTLWindingCounterClockwise;
+}
+
+static MTLCullMode
+CullFaceToMetal(const SDL_GpuCullFace face)
+{
+    switch (face) {
+        case SDL_GPUCULLFACE_BACK: return MTLCullModeBack;
+        case SDL_GPUCULLFACE_FRONT: return MTLCullModeFront;
+        case SDL_GPUCULLFACE_NONE: return MTLCullModeNone;
+    }
+
+    SDL_assert(!"Unexpected cull mode");
+    return MTLCullModeBack;
+}
+
+
+static MTLSamplerAddressMode
+SamplerAddressToMetal(const SDL_GpuSamplerAddressMode addrmode)
+{
+    switch (addrmode) {
+        case SDL_GPUSAMPADDR_CLAMPTOEDGE: return MTLSamplerAddressModeClampToEdge;
+        case SDL_GPUSAMPADDR_MIRRORCLAMPTOEDGE: return MTLSamplerAddressModeMirrorClampToEdge;
+        case SDL_GPUSAMPADDR_REPEAT: return MTLSamplerAddressModeRepeat;
+        case SDL_GPUSAMPADDR_MIRRORREPEAT: return MTLSamplerAddressModeMirrorRepeat;
+        case SDL_GPUSAMPADDR_CLAMPTOZERO: return MTLSamplerAddressModeClampToZero;
+        case SDL_GPUSAMPADDR_CLAMPTOBORDERCOLOR: return MTLSamplerAddressModeClampToBorderColor;
+    }
+
+    SDL_assert(!"Unexpected sampler address mode");
+    return MTLSamplerAddressModeClampToEdge;
+}
+
+static MTLSamplerBorderColor
+SamplerBorderColorToMetal(const SDL_GpuSamplerBorderColor color)
+{
+    switch (color) {
+        case SDL_GPUSAMPBORDER_TRANSPARENT_BLACK: return MTLSamplerBorderColorTransparentBlack;
+        case SDL_GPUSAMPBORDER_OPAQUE_BLACK: return MTLSamplerBorderColorOpaqueBlack;
+        case SDL_GPUSAMPBORDER_OPAQUE_WHITE: return MTLSamplerBorderColorOpaqueWhite;
+    }
+
+    SDL_assert(!"Unexpected sampler border color");
+    return MTLSamplerBorderColorTransparentBlack;
+}
+
+static MTLSamplerMinMagFilter
+SamplerMinMagFilterToMetal(const SDL_GpuSamplerMinMagFilter filt)
+{
+    switch (filt) {
+        case SDL_GPUMINMAGFILTER_NEAREST: return MTLSamplerMinMagFilterNearest;
+        case SDL_GPUMINMAGFILTER_LINEAR: return MTLSamplerMinMagFilterLinear;
+    }
+
+    SDL_assert(!"Unexpected sampler minmag filter");
+    return MTLSamplerMinMagFilterNearest;
+}
+
+static MTLSamplerMipFilter
+SamplerMipFilterToMetal(const SDL_GpuSamplerMipFilter filt)
+{
+    switch (filt) {
+        case SDL_GPUMIPFILTER_NOTMIPMAPPED: return MTLSamplerMipFilterNotMipmapped;
+        case SDL_GPUMIPFILTER_NEAREST: return MTLSamplerMipFilterNearest;
+        case SDL_GPUMIPFILTER_LINEAR: return MTLSamplerMipFilterLinear;
+    }
+
+    SDL_assert(!"Unexpected sampler mip filter");
+    return MTLSamplerMipFilterNotMipmapped;
+}
+
+static MTLLoadAction
+LoadActionToMetal(const SDL_GpuPassInit action)
+{
+    switch (action) {
+        case SDL_GPUPASSINIT_UNDEFINED: return MTLLoadActionDontCare;
+        case SDL_GPUPASSINIT_LOAD: return MTLLoadActionLoad;
+        case SDL_GPUPASSINIT_CLEAR: return MTLLoadActionClear;
+    }
+
+    SDL_assert(!"Unexpected load action");
+    return MTLLoadActionDontCare;
+}
+
+static MTLIndexType
+IndexTypeToMetal(const SDL_GpuIndexType typ)
+{
+    switch (typ) {
+        case SDL_GPUINDEXTYPE_UINT16: return MTLIndexTypeUInt16;
+        case SDL_GPUINDEXTYPE_UINT32: return MTLIndexTypeUInt32;
+    }
+
+    SDL_assert(!"Unexpected index type");
+    return MTLIndexTypeUInt16;
+}
+
+
+static SDL_MetalView
+GetWindowView(SDL_Window *window)
+{
+    SDL_SysWMinfo info;
+
+    SDL_VERSION(&info.version);
+    if (SDL_GetWindowWMInfo(window, &info)) {
+#ifdef __MACOSX__
+        if (info.subsystem == SDL_SYSWM_COCOA) {
+            NSView *view = info.info.cocoa.window.contentView;
+            if (view.subviews.count > 0) {
+                view = view.subviews[0];
+                if (view.tag == SDL_METALVIEW_TAG) {
+                    return (SDL_MetalView) CFBridgingRetain(view);
+                }
+            }
+        }
+#else
+        if (info.subsystem == SDL_SYSWM_UIKIT) {
+            UIView *view = info.info.uikit.window.rootViewController.view;
+            if (view.tag == SDL_METALVIEW_TAG) {
+                return (SDL_MetalView) CFBridgingRetain(view);
+            }
+        }
+#endif
+    }
+    return nil;
+}
+
+
+static int
+METAL_GpuClaimWindow(SDL_GpuDevice *device, SDL_Window *window)
+{
+    const Uint32 window_flags = SDL_GetWindowFlags(window);
+    SDL_bool changed_window = SDL_FALSE;
+    METAL_GpuWindowData *windata;
+    CAMetalLayer *layer = nil;
+    SDL_MetalView view;
+
+    windata = [[METAL_GpuWindowData alloc] init];
+    if (windata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    windata.backbuffer = nil;
+
+    if (!(window_flags & SDL_WINDOW_METAL)) {
+        changed_window = SDL_TRUE;
+        if (SDL_RecreateWindow(window, (window_flags & ~(SDL_WINDOW_VULKAN | SDL_WINDOW_OPENGL)) | SDL_WINDOW_METAL) < 0) {
+            return -1;
+        }
+    }
+
+    view = GetWindowView(window);
+    if (view == nil) {
+        view = SDL_Metal_CreateView(window);
+        if (view == nil) {
+            if (changed_window) {
+                SDL_RecreateWindow(window, window_flags);
+            }
+            return -1;
+        }
+    }
+
+    windata.mtlview = view;
+
+// !!! FIXME: does this need bridging, or can we just assign and let ARC handle it?
+#ifdef __MACOSX__
+    layer = (CAMetalLayer *)[(__bridge NSView *)windata.mtlview layer];
+#else
+    layer = (CAMetalLayer *)[(__bridge UIView *)windata.mtlview layer];
+#endif
+
+    layer.device = data.mtldevice;
+    layer.framebufferOnly = NO;
+    windata.mtllayer = layer;
+
+    window->gpu_driverdata = (void *) CFBridgingRetain(windata);
+
+    return 0;
+}
+
+static int
+METAL_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
+{
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) buffer->device->driverdata;
+    METAL_GpuBufferData *bufferdata;
+
+    bufferdata = [[METAL_GpuBufferData alloc] init];
+    if (bufferdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    if (data != NULL) {
+        bufferdata.mtlbuf = [devdata->mtldevice newBufferWithBytes:data length:buffer->buflen options:MTLResourceStorageModeShared];
+    } else {
+        bufferdata.mtlbuf = [devdata->mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModeShared];
+    }
+
+    if (bufferdata.mtlbuf == nil) {
+        SDL_SetError("Failed to create Metal buffer!");
+    }
+
+    if (buffer->label) {
+        bufferdata.mtlbuf.label = [NSString stringWithUTF8String:buffer->label];
+    }
+
+    buffer->driverdata = (void *) CFBridgingRetain(bufferdata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
+{
+    CFBridgingRelease(buffer->driverdata);
+}
+
+static void *
+METAL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+{
+    METAL_GpuBufferData *bufdata = (__bridge METAL_GpuBufferData *) buffer->driverdata;
+    void *retval = [bufdata->mtlbuf contents];
+    SDL_assert(retval != NULL);  // should only return NULL for private (GPU-only) buffers.
+    return retval;
+}
+
+static int
+METAL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+{
+    return 0;
+}
+
+static int
+METAL_GpuCreateBuffer(SDL_GpuBuffer *buffer)
+{
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) buffer->device->driverdata;
+    METAL_GpuBufferData *bufferdata;
+
+    bufferdata = [[METAL_GpuBufferData alloc] init];
+    if (bufferdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    bufferdata.mtlbuf = [devdata->mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModePrivate];
+    if (bufferdata.mtlbuf == nil) {
+        SDL_SetError("Failed to create Metal buffer!");
+    }
+
+    if (buffer->label) {
+        bufferdata.mtlbuf.label = [NSString stringWithUTF8String:buffer->label];
+    }
+
+    buffer->driverdata = (void *) CFBridgingRetain(bufferdata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyBuffer(SDL_GpuBuffer *buffer)
+{
+    CFBridgingRelease(buffer->driverdata);
+}
+
+static int
+METAL_GpuCreateTexture(SDL_GpuTexture *texture)
+{
+    const SDL_GpuTextureDescription *desc = &texture->desc;
+
+    const MTLPixelFormat mtlfmt = PixelFormatToMetal(desc->pixel_format);
+    if (mtlfmt == MTLPixelFormatInvalid) {
+        return -1;
+    }
+
+    MTLTextureType mtltextype;
+    SDL_bool is_cube = SDL_FALSE;
+    SDL_bool is_array = SDL_FALSE;
+    SDL_bool is_3d = SDL_FALSE;
+    switch (desc->texture_type) {
+        case SDL_GPUTEXTYPE_1D: mtltextype = MTLTextureType1D; break;
+        case SDL_GPUTEXTYPE_2D: mtltextype = MTLTextureType2D; break;
+        case SDL_GPUTEXTYPE_CUBE: mtltextype = MTLTextureTypeCube; is_cube = SDL_TRUE; break;
+        case SDL_GPUTEXTYPE_3D: mtltextype = MTLTextureType3D; is_3d = SDL_TRUE; break;
+        case SDL_GPUTEXTYPE_1D_ARRAY: mtltextype = MTLTextureType1DArray; is_array = SDL_TRUE; break;
+        case SDL_GPUTEXTYPE_2D_ARRAY: mtltextype = MTLTextureType2DArray; is_array = SDL_TRUE; break;
+        case SDL_GPUTEXTYPE_CUBE_ARRAY: mtltextype = MTLTextureTypeCubeArray; is_cube = SDL_TRUE; is_array = SDL_TRUE; break;
+        default: return SDL_SetError("Unsupported texture type");
+    };
+
+    MTLTextureUsage mtltexusage = (MTLTextureUsage) 0;
+    if (desc->usage & SDL_GPUTEXUSAGE_SHADER_READ) {
+        mtltexusage |= MTLTextureUsageShaderRead;
+    }
+    if (desc->usage & SDL_GPUTEXUSAGE_SHADER_WRITE) {
+        mtltexusage |= MTLTextureUsageShaderWite;
+    }
+    if (desc->usage & SDL_GPUTEXUSAGE_RENDER_TARGET) {
+        mtltexusage |= MTLTextureUsageRenderTarget;
+    }
+
+    METAL_GpuTextureData *texturedata = [[METAL_GpuTextureData alloc] init];
+    if (texturedata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    texturedata.mtldrawable = nil;
+
+    // !!! FIXME: does ARC know what to do with these, since it doesn't start with "alloc" or "new"?
+    MTLTextureDescriptor *mtltexdesc;
+    if (is_cube) {
+        mtltexdesc = [MTLTextureDescriptor textureCubeDescriptorWithPixelFormat:mtlfmt size:desc->width mipmapped:NO];
+    } else {
+        mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:mtlfmt width:desc->width height:desc->height mipmapped:NO];
+    }
+
+    if (mtltexdesc == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    mtltexdesc.textureType = mtltextype;
+    mtltexdesc.pixelFormat = mtlfmt;
+    mtltexdesc.width = desc->width;
+    mtltexdesc.height = is_cube ? desc->width : desc->height;
+    mtltexdesc.depth = (is_3d) ? desc->depth_or_slices : 1;
+    mtltexdesc.mipmapLevelCount = desc->mipmap_levels;
+    mtltexdesc.sampleCount = 1;  // !!! FIXME: multisample support
+    mtltexdesc.arrayLength = is_3d ? 1 : desc->depth_or_slices;
+    if (is_cube) {
+        mtltexdesc.arrayLength /= 6;
+    }
+    mtltexdesc.resourceOptions = MTLResourceStorageModePrivate;
+
+    // not available in iOS 8.
+    if ([mtltexdesc respondsToSelector:@selector(usage)]) {
+        mtltexdesc.usage = mtltexusage;
+    }
+
+    // these arrived in later releases, but we want the defaults anyhow.
+    //mtltexdesc.cpuCacheMode = MTLCPUCacheModeDefaultCache;
+    //mtltexdesc.hazardTrackingMode = MTLHazardTrackingModeDefault;
+    //mtltexdesc.allowGPUOptimizedContents = YES;
+    //mtltexdesc.swizzle = blahblahblah;
+
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) texture->device->driverdata;
+    texturedata.mtltexture = [devdata->device newTextureWithDescriptor:mtltexdesc];
+    if (texturedata.mtltexture == nil) {
+        SDL_SetError("Failed to create Metal texture!");
+    }
+
+    texturedata.mtltexture.label = [NSString stringWithUTF8String:desc->label];
+
+    texture->driverdata = (void *) CFBridgingRetain(texturedata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyTexture(SDL_GpuTexture *texture)
+{
+    CFBridgingRelease(texture->driverdata);
+}
+
+
+// !!! FIXME
+static int METAL_GpuCreateShader(SDL_GpuShader *shader, const Uint8 *bytecode, const Uint32 bytecodelen) { return 0; }
+static void METAL_GpuDestroyShader(SDL_GpuShader *shader) {}
+
+
+
+static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
+{
+    const SDL_GpuPipelineDescription *desc = &pipeline->desc;
+
+    // !!! FIXME: I assume this has to be something depthy, and not RGBy.
+    const MTLPixelFormat mtldepthfmt = PixelFormatToMetal(desc->depth_format);
+    if ((mtldepthfmt == MTLPixelFormatInvalid) && (desc->depth_format != SDL_GPUPIXELFMT_INVALID)) {
+        return SDL_SetError("Invalid depth pixel format");
+    }
+
+    // !!! FIXME: I assume this has to be something stencilly, and not RGBy.
+    const MTLPixelFormat mtlstencilfmt = PixelFormatToMetal(desc->stencil_format);
+    if ((mtlstencilfmt == MTLPixelFormatInvalid) && (desc->stencil_format != SDL_GPUPIXELFMT_INVALID)) {
+        return SDL_SetError("Invalid stencil pixel format");
+    }
+
+    METAL_GpuPipelineData *pipelinedata = [[METAL_GpuPipelineData alloc] init];
+    if (pipelinedata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    MTLRenderPipelineDescriptor *mtlpipedesc = [[MTLRenderPipelineDescriptor alloc] init];
+    if (mtlpipedesc == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    MTLVertexDescriptor *mtlvertdesc = [MTLVertexDescriptor vertexDescriptor];
+    for (Uint32 i = 0; i < desc->num_vertex_attributes; i++) {
+        mtlvertdesc.attributes[i].format = VertFormatToMetal(desc->vertices[i].format);
+        mtlvertdesc.attributes[i].offset = desc->vertices[i].offset;
+        mtlvertdesc.attributes[i].bufferIndex = desc->vertices[i].index;
+        mtlvertdesc.layouts[i].stepFunction = MTLVertexStepFunctionPerVertex;  // !!! FIXME
+        mtlvertdesc.layouts[i].stepRate = 1;  // !!! FIXME
+        mtlvertdesc.layouts[i].stride = desc->vertices[i].stride;
+    }
+
+    for (Uint32 i = 0; i < desc->num_color_attachments; i++) {
+        const SDL_GpuPipelineColorAttachmentDescription *sdldesc = &desc->color_attachments[i];
+        const MTLPixelFormat mtlfmt = PixelFormatToMetal(sdldesc->pixel_format);
+        if (mtlfmt == MTLPixelFormatInvalid) {
+            return SDL_SetError("Invalid pixel format in color attachment #%u", (unsigned int) i);
+        }
+
+        MTLColorWriteMask writemask = MTLColorWriteMaskNone;
+        if (sdldesc->writemask_enabled_red) { writemask |= MTLColorWriteMaskRed; }
+        if (sdldesc->writemask_enabled_blue) { writemask |= MTLColorWriteMaskBlue; }
+        if (sdldesc->writemask_enabled_green) { writemask |= MTLColorWriteMaskGreen; }
+        if (sdldesc->writemask_enabled_alpha) { writemask |= MTLColorWriteMaskAlpha; }
+
+        MTLRenderPipelineColorAttachmentDescriptor *metaldesc = mtlpipedesc.colorAttachments[i];
+        metaldesc.pixelFormat = mtlfmt;
+        metaldesc.writeMask = writemask;
+        metaldesc.blendingEnabled = sdldesc->blending_enabled ? YES : NO;
+        metaldesc.alphaBlendOperation = BlendOpToMetal(sdldesc->alpha_blend_op);
+        metaldesc.sourceAlphaBlendFactor = BlendFactorToMetal(sdldesc->alpha_src_blend_factor);
+        metaldesc.destinationAlphaBlendFactor = BlendFactorToMetal(sdldesc->alpha_dst_blend_factor);
+        metaldesc.rgbBlendOperation = BlendOpToMetal(sdldesc->rgb_blend_op);
+        metaldesc.sourceRGBBlendFactor = BlendFactorToMetal(sdldesc->rgb_src_blend_factor);
+        metaldesc.destinationRGBBlendFactor = BlendFactorToMetal(sdldesc->rgb_dst_blend_factor);
+    }
+
+    METAL_GpuShaderData *vshaderdata = (__bridge METAL_GpuShaderData *) desc->vertex_shader->driverdata;
+    METAL_GpuShaderData *fshaderdata = (__bridge METAL_GpuShaderData *) desc->fragment_shader->driverdata;
+
+    mtlpipedesc.label = [NSString stringWithUTF8String:desc->label];
+    mtlpipedesc.vertexFunction = vshaderdata.mtlfunction;
+    mtlpipedesc.fragmentFunction = fshaderdata.mtlfunction;
+    mtlpipedesc.vertexDescriptor = mtlvertdesc;
+    mtlpipedesc.depthAttachmentPixelFormat = mtldepthfmt;
+    mtlpipedesc.stencilAttachmentPixelFormat = mtlstencilfmt;
+    mtlpipedesc.sampleCount = 1;  // !!! FIXME: multisampling
+    mtlpipedesc.alphaToCoverageEnabled = NO;
+    mtlpipedesc.alphaToOneEnabled = NO;
+    mtlpipedesc.rasterizationEnabled = YES;
+    mtlpipedesc.rasterSampleCount = 1;  // !!! FIXME: multisampling  (also, how is this different from sampleCount?)
+
+    // Not available before iOS 12.
+    if ([mtlpipedesc respondsToSelector:@selector(inputPrimitiveTopology)]) {
+        mtltexdesc.inputPrimitiveTopology = PrimitiveTopologyToMetal(desc->primitive);
+    }
+
+    // these arrived in later releases, but we _probably_ want the defaults anyhow (and/or we don't support it).
+    //mtlpipedesc.maxVertexCallStackDepth = 1;
+    //mtlpipedesc.maxFragmentCallStackDepth = 1;
+    //mtlpipedesc.vertexBuffers
+    //mtlpipedesc.fragmentBuffers
+    //mtlpipedesc.maxTessellationFactor
+    //mtlpipedesc.tessellationFactorScaleEnabled
+    //mtlpipedesc.tessellationFactorFormat
+    //mtlpipedesc.tessellationControlPointIndexType
+    //mtlpipedesc.tessellationFactorStepFunction
+    //mtlpipedesc.tessellationOutputWindingOrder
+    //mtlpipedesc.tessellationPartitionMode
+    //mtlpipedesc.supportIndirectCommandBuffers
+    //mtlpipedesc.maxVertexAmplificationCount
+    //mtlpipedesc.supportAddingVertexBinaryFunctions
+    //mtlpipedesc.supportAddingFragmentBinaryFunctions
+    //mtlpipedesc.binaryArchives
+    //mtlpipedesc.vertexLinkedFunctions
+    //mtlpipedesc.fragmentLinkedFunctions
+    //mtlpipedesc.fragmentPreloadedLibraries
+    //mtlpipedesc.vertexPreloadedLibraries
+
+    // !!! FIXME: Hash existing pipelines and reuse them, with reference counting, for when the only things
+    // !!! FIXME: that are different are states that exist in SDL_GpuPipeline but not MTLRenderPipelineState.
+    // !!! FIXME: Likewise for depth stencil objects.
+
+    // Metal wants to create separate, long-living state objects for
+    // depth stencil stuff, so build one to go with pipeline.
+    // !!! FIXME: iOS 8 doesn't have -(void)[MTLRenderCommandEncoder setStencilFrontReferenceValue:backReferenceValue:],
+    // !!! FIXME: only one that sets them to the same thing, so we should fail if
+    // !!! FIXME: stencil_reference_front != stencil_reference_back on that target.
+    MTLDepthStencilDescriptor *mtldepthstencildesc = [[MTLDepthStencilDescriptor alloc] init];
+    mtldepthstencildesc.label = mtlpipedesc.label;
+    mtldepthstencildesc.depthCompareFunction = CompareFunctionToMetal(desc->depth_function);
+    mtldepthstencildesc.depthWriteEnabled = desc->depth_write_enabled ? YES : NO;
+    mtldepthstencildesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
+    mtldepthstencildesc.backFaceStencil.stencilFailureOperation = StencilOpToMetal(desc->depth_stencil_back.stencil_fail);
+    mtldepthstencildesc.backFaceStencil.depthFailureOperation = StencilOpToMetal(desc->depth_stencil_back.depth_fail);
+    mtldepthstencildesc.backFaceStencil.depthStencilPassOperation = StencilOpToMetal(desc->depth_stencil_back.depth_and_stencil_pass);
+    mtldepthstencildesc.backFaceStencil.stencilCompareFunction = CompareFunctionToMetal(desc->depth_stencil_back.stencil_function);
+    mtldepthstencildesc.backFaceStencil.readMask = desc->depth_stencil_back.stencil_read_mask;
+    mtldepthstencildesc.backFaceStencil.writeMask = desc->depth_stencil_back.stencil_write_mask;
+    mtldepthstencildesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
+    mtldepthstencildesc.frontFaceStencil.stencilFailureOperation = StencilOpToMetal(desc->depth_stencil_front.stencil_fail);
+    mtldepthstencildesc.frontFaceStencil.depthFailureOperation = StencilOpToMetal(desc->depth_stencil_front.depth_fail);
+    mtldepthstencildesc.frontFaceStencil.depthStencilPassOperation = StencilOpToMetal(desc->depth_stencil_front.depth_and_stencil_pass);
+    mtldepthstencildesc.frontFaceStencil.stencilCompareFunction = CompareFunctionToMetal(desc->depth_stencil_front.stencil_function);
+    mtldepthstencildesc.frontFaceStencil.readMask = desc->depth_stencil_front.stencil_read_mask;
+    mtldepthstencildesc.frontFaceStencil.writeMask = desc->depth_stencil_front.stencil_write_mask;
+
+    NSError *err = nil;
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) pipeline->device->driverdata;
+    pipelinedata.mtldepthstencil = [devdata->mtldevice newDepthStencilStateWithDescriptor:mtldepthstencildesc];
+    pipelinedata.mtlpipeline = [devdata->mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
+    pipelinedata.mtlprimitive = PrimitiveToMetal(desc->primitive);
+    pipelinedata.mtlfillmode = FillModeToMetal(desc->fill_mode);
+    pipelinedata.mtlfrontface = FrontFaceToMetal(desc->front_face);
+    pipelinedata.mtlcullface = CullFaceToMetal(desc->cull_face);
+    pipelinedata.depth_bias = desc->depth_bias;
+    pipelinedata.depth_bias_scale = desc->depth_bias;
+    pipelinedata.depth_bias_clamp = desc->depth_bias_clamp;
+    pipelinedata.front_stencil_reference = desc->depth_stencil_front.stencil_reference;
+    pipelinedata.back_stencil_reference = desc->depth_stencil_back.stencil_reference;
+
+    SDL_assert(err == nil);  // !!! FIXME: for what reasons can this fail?
+
+    if (pipelinedata.mtldepthstencil == nil) {
+        return SDL_SetError("Failed to create Metal depth stencil!");
+    } else if (pipelinedata.mtlpipeline == nil) {
+        return SDL_SetError("Failed to create Metal pipeline!");
+    }
+
+    pipeline->driverdata = (void *) CFBridgingRetain(pipelinedata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline)
+{
+    CFBridgingRelease(pipeline->driverdata);
+}
+
+static int
+METAL_GpuCreateSampler(SDL_GpuSampler *sampler)
+{
+    const SDL_GpuSamplerDescription *desc = &sampler->desc;
+
+    METAL_GpuSamplerData *samplerdata = [[METAL_GpuSamplerData alloc] init];
+    if (samplerdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    MTLSamplerDescriptor *mtlsamplerdesc = [[MTLSamplerDescriptor alloc] init];
+    if (mtlsamplerdesc == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    mtlsamplerdesc.label = [NSString stringWithUTF8String:desc->label];
+    mtlsamplerdesc.normalizedCoordinates = YES;
+    mtlsamplerdesc.rAddressMode = SamplerAddressToMetal(desc->addrmode_r);
+    mtlsamplerdesc.sAddressMode = SamplerAddressToMetal(desc->addrmode_u);
+    mtlsamplerdesc.tAddressMode = SamplerAddressToMetal(desc->addrmode_v);
+    mtlsamplerdesc.borderColor = SamplerBorderColorToMetal(desc->border_color);
+    mtlsamplerdesc.minFilter = SamplerMinMagFilterToMetal(desc->min_filter);
+    mtlsamplerdesc.magFilter = SamplerMinMagFilterToMetal(desc->mag_filter);
+    mtlsamplerdesc.mipFilter = SamplerMipFilterToMetal(desc->mip_filter);
+    mtlsamplerdesc.maxAnistropy = desc->max_anisotropy;
+
+    // !!! FIXME: add these?
+    //mtlsamplerdesc.lodMinClamp
+    //mtlsamplerdesc.lodMaxClamp
+    //mtlsamplerdesc.lodAverage
+    //mtlsamplerdesc.compareFunction
+    //mtlsamplerdesc.supportArgumentBuffers
+
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) pipeline->device->driverdata;
+    samplerdata.mtlsampler = [devdata->mtldevice newSamplerStateWithDescriptor:mtlsamplerdesc];
+    if (samplerdata.mtlsampler == nil) {
+        return SDL_SetError("Failed to create Metal sampler!");
+    }
+
+    sampler->driverdata = (void *) CFBridgingRetain(samplerdata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroySampler(SDL_GpuSampler *sampler)
+{
+    CFBridgingRelease(sampler->driverdata);
+}
+
+static int
+METAL_GpuCreateCommandBuffer(SDL_GpuCommandBuffer *cmdbuf)
+{
+    METAL_GpuCommandBufferData *cmdbufdata = [[METAL_GpuCommandBufferData alloc] init];
+    if (cmdbufdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) cmdbuf->device->driverdata;
+    cmdbufdata.mtlcmdbuf = [devdata.mtlcmdqueue commandBuffer];
+    if (cmdbufdata.mtlcmdbuf == nil) {
+        return SDL_SetError("Failed to create Metal command buffer!");
+    }
+
+    cmdbufdata.mtlcmdbuf.label = [NSString stringWithUTF8String:cmdbuf->label];
+
+    cmdbuf->driverdata = (void *) CFBridgingRetain(cmdbufdata);
+
+    return 0;
+}
+
+static int
+METAL_GpuStartRenderPass(SDL_GpuRenderPass *pass, Uint32 num_color_attachments, const SDL_GpuColorAttachmentDescription *color_attachments, const SDL_GpuDepthAttachmentDescription *depth_attachment, const SDL_GpuStencilAttachmentDescription *stencil_attachment)
+{
+    METAL_GpuRenderPassData *passdata = [[METAL_GpuRenderPassData alloc] init];
+    if (passdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    MTLRenderPassDescriptor *mtlpassdesc = [MTLRenderPassDescriptor renderPassDescriptor];
+    if (mtlpassdesc == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    for (Uint32 i = 0; i < num_color_attachments; i++) {
+        const SDL_GpuColorAttachmentDescription *sdldesc = &color_attachments[i];
+        MTLRenderPassColorAttachmentDescriptor *mtldesc = mtlpassdesc.colorAttachments[i];
+        METAL_GpuTextureData *texturedata = (__bridge METAL_GpuTextureData *) sdldesc->texture->driverdata;
+        mtldesc.texture = texturedata.mtltexture;
+        mtldesc.loadAction = LoadActionToMetal(sdldesc->color_init);
+        mtldesc.clearColor = MTLClearColorMake(sdldesc->clear_red, sdldesc->clear_green, sdldesc->clear_blue, sdldesc->clear_alpha);
+
+        // !!! FIXME: not used (but maybe should be)...
+        //mtldesc.level
+        //mtldesc.slice
+        //mtldesc.depthPlane
+        //mtldesc.storeAction
+        //mtldesc.storeActionOptions
+        //mtldesc.resolveTexture
+        //mtldesc.resolveLevel
+        //mtldesc.resolveSlice
+        //mtldesc.resolveDepthPlane
+    }
+
+    if (depth_attachment) {
+        METAL_GpuTextureData *depthtexturedata = (__bridge METAL_GpuTextureData *) depth_attachment->texture->driverdata;
+        mtlpassdesc.depthAttachment.texture = depthtexturedata.mtltexture;
+        mtlpassdesc.depthAttachment.loadAction = LoadActionToMetal(depth_attachment->depth_init);
+        mtlpassdesc.depthAttachment.clearDepth = depth_attachment->clear_depth;
+    }
+
+    if (stencil_attachment) {
+        METAL_GpuTextureData *stenciltexturedata = (__bridge METAL_GpuTextureData *) stencil_attachment->texture->driverdata;
+        mtlpassdesc.stencilAttachment.texture = stenciltexturedata.mtltexture;
+        mtlpassdesc.stencilAttachment.loadAction = LoadActionToMetal(stencil_attachment->stencil_init);
+        mtlpassdesc.stencilAttachment.clearDepth = stencil_attachment->clear_stencil;
+    }
+
+    METAL_GpuCommandBufferData *cmdbufdata = (__bridge METAL_GpuCommandBufferData *) pass->cmdbuf->driverdata;
+    passdata.mtlpass = [cmdbufdata.mtlcmdbuf renderCommandEncoderWithDescriptor:mtlpassdesc];
+    if (passdata.mtlpass == nil) {
+        return SDL_SetError("Failed to create Metal render command encoder!");
+    }
+
+    const SDL_GpuTextureDescription *colatt0 = num_color_attachments ? &color_attachments[0].texture->desc : NULL;
+
+    // set up defaults for things that are part of SDL_GpuPipeline, but not part of MTLRenderPipelineState
+    passdata.mtlpipeline = nil;
+    passdata.mtldepthstencil = nil;
+    passdata.mtlprimitive = MTLPrimitiveTypeTriangleStrip;
+    passdata.mtlfillmode = MTLTriangleFillModeFill;
+    passdata.mtlfrontface = MTLWindingClockwise;
+    passdata.mtlcullface = MTLCullModeNone;
+    passdata.depth_bias = 0.0f;
+    passdata.depth_bias_scale = 0.0f;
+    passdata.depth_bias_clamp = 0.0f;
+    passdata.front_stencil_reference = 0x00000000;
+    passdata.back_stencil_reference = 0x00000000;
+    passdata.viewport.originX = 0.0;
+    passdata.viewport.originY = 0.0;
+    passdata.viewport.width = colatt0 ? (double) colatt0->width : 0.0;
+    passdata.viewport.height = colatt0 ? (double) colatt0->height : 0.0;
+    passdata.viewport.znear = 0.0;
+    passdata.viewport.zfar = 1.0;
+    passdata.scissor.x = 0.0;
+    passdata.scissor.y = 0.0;
+    passdata.scissor.width = colatt0 ? colatt0->width : 0;
+    passdata.scissor.height = colatt0 ? colatt0->height : 0;
+    passdata.blend_constant_red = 0.0f;
+    passdata.blend_constant_green = 0.0f;
+    passdata.blend_constant_blue = 0.0f;
+    passdata.blend_constant_alpha = 0.0f;
+
+    pass->driverdata = (void *) CFBridgingRetain(passdata);
+
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuPipelineData *pipelinedata = (__bridge METAL_GpuPipelineData *) pipeline->driverdata;
+
+    if (passdata.mtlpipeline != pipelinedata.mtlpipeline) {
+        [passdata.mtlpass setRenderPipelineState:pipelinedata.mtlpipeline];
+        passdata.mtlpipeline = pipelinedata.mtlpipeline;
+    }
+
+    if (passdata.mtldepthstencil != pipelinedata.mtldepthstencil) {
+        [passdata.mtlpass setDepthStencilState:pipelinedata.mtldepthstencil];
+        passdata.mtldepthstencil = pipelinedata.mtldepthstencil;
+    }
+
+    if (passdata.mtlfillmode != pipelinedata.mtlfillmode) {
+        [passdata.mtlpass setTriangleFillMode:pipelinedata.mtlfillmode];
+        passdata.mtlfillmode = pipelinedata.mtlfillmode;
+    }
+
+    if (passdata.mtlfrontface != pipelinedata.mtlfrontface) {
+        [passdata.mtlpass setFrontFaceWinding:pipelinedata.mtlfrontface];
+        passdata.mtlfrontface = pipelinedata.mtlfrontface;
+    }
+
+    if (passdata.mtlcullface != pipelinedata.mtlcullface) {
+        [passdata.mtlpass setCullMode:pipelinedata.mtlcullface];
+        passdata.mtlcullface = pipelinedata.mtlcullface;
+
+    }
+
+    if ( (passdata.depth_bias != pipelinedata.depth_bias) ||
+         (passdata.depth_bias_scale != pipelinedata.depth_bias_scale) ||
+         (passdata.depth_bias_clamp != pipelinedata.depth_bias_clamp) ) {
+        passdata.depth_bias = pipelinedata.depth_bias;
+        passdata.depth_bias_scale = pipelinedata.depth_bias_scale;
+        passdata.depth_bias_clamp = pipelinedata.depth_bias_clamp;
+        [passdata.mtlpass setDepthBias:passdata.depth_bias slopeScale:passdata.depth_bias_scale clamp:passdata.depth_bias_clamp];
+    }
+
+    if ( (passdata.front_stencil_reference != pipelinedata.front_stencil_reference) ||
+         (passdata.back_stencil_reference != pipelinedata.back_stencil_reference) ) {
+        passdata.front_stencil_reference = pipelinedata.front_stencil_reference;
+        passdata.back_stencil_reference = pipelinedata.back_stencil_reference;
+        [passdata.mtlpass setStencilFrontReferenceValue:passdata.front_stencil_reference backReferenceValue:passdata.back_stencil_reference];
+    }
+
+    passdata.mtlprimitive = pipelinedata.mtlprimitive;  // for future draws.
+
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    if ( (x != pass.viewport.originX) || (y != pass.viewport.originY) ||
+         (width != pass.viewport.width) || (height != pass.viewport.height) ||
+         (znear != pass.viewport.znear) || (zfar != pass.viewport.zfar) ) {
+        passdata.viewport.originX = x;
+        passdata.viewport.originY = y;
+        passdata.viewport.width = width;
+        passdata.viewport.height = height;
+        passdata.viewport.znear = znear;
+        passdata.viewport.zfar = zfar;
+        [passdata.mtlpass setViewport:pass.viewport];
+    }
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, Uint32 x, Uint32 y, Uint32 width, Uint32 height)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    if ( (x != pass.scissor.x) || (y != pass.scissor.y) ||
+         (width != pass.scissor.width) || (height != pass.scissor.height) ) {
+        passdata.scissor.x = x;
+        passdata.scissor.y = y;
+        passdata.scissor.width = width;
+        passdata.scissor.height = height;
+        [passdata.mtlpass setScissorRect:pass.scissor];
+    }
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, double dred, double dgreen, double dblue, double dalpha)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    const float red = (const float) dred;
+    const float green = (const float) dgreen;
+    const float blue = (const float) dblue;
+    const float alpha = (const float) dalpha;
+    if ( (red != pass.blend_constant_red) || (green != pass.blend_constant_green) ||
+         (blue != pass.blend_constant_blue) || (height != pass.blend_constant_alpha) ) {
+        passdata.blend_constant_red = red;
+        passdata.blend_constant_green = green;
+        passdata.blend_constant_blue = blue;
+        passdata.blend_constant_alpha = alpha;
+        [passdata.mtlpass setBlendColorRed:red green:green blue:blue alpha:alpha];
+    }
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuBufferData *bufdata = buffer ? (__bridge METAL_GpuBufferData *) buffer->driverdata : nil;
+    [passdata.mtlpass setVertexBuffer:((bufdata == nil) ? nil : bufdata.mltbuffer) offset:offset atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuSamplerData *samplerdata = sampler ? (__bridge METAL_GpuSamplerData *) sampler->driverdata : nil;
+    [passdata.mtlpass setVertexSamplerState:((samplerdata == nil) ? nil : samplerdata.mltsampler) atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuTextureData *texturedata = texture ? (__bridge METAL_GpuTextureData *) texture->driverdata : nil;
+    [passdata.mtlpass setVertexTexture:((texturedata == nil) ? nil : texturedata.mlttexture) atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuBufferData *bufdata = buffer ? (__bridge METAL_GpuBufferData *) buffer->driverdata : nil;
+    [passdata.mtlpass setFragmentBuffer:((bufdata == nil) ? nil : bufdata.mltbuffer) offset:offset atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuSamplerData *samplerdata = sampler ? (__bridge METAL_GpuSamplerData *) sampler->driverdata : nil;
+    [passdata.mtlpass setFragmentSampler:((samplerdata == nil) ? nil : samplerdata.mltsampler) atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, Uint32 index)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuTextureData *texturedata = texture ? (__bridge METAL_GpuTextureData *) texture->driverdata : nil;
+    [passdata.mtlpass setFragmentTexture:((texturedata == nil) ? nil : texturedata.mlttexture) atIndex:index];
+    return 0;
+}
+
+static int
+METAL_GpuDraw(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    [passdata.mtlpass drawPrimitives:passdata.mtlprimitive vertexStart:vertex_start vertexCount:vertex_count];
+    return 0;
+}
+
+static int
+METAL_GpuDrawIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuBufferData *idxbufdata = (__bridge METAL_GpuBufferData *) index_buffer->driverdata;
+    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) index_buffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset];
+    return 0;
+}
+
+static int
+METAL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 vertex_count, Uint32 instance_count, Uint32 base_instance)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    [passdata.mtlpass drawPrimitives:passdata.mtlprimitive vertexStart:vertex_start vertexCount:vertex_count instanceCount:instance_count baseInstance:base_instance];
+    return 0;
+}
+
+static int
+METAL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    METAL_GpuBufferData *idxbufdata = (__bridge METAL_GpuBufferData *) index_buffer->driverdata;
+    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) index_buffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset instanceCount:instance_count baseVertex:base_vertex baseInstance:base_instance];
+    return 0;
+}
+
+static int
+METAL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
+{
+    METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
+    [passdata.mtlpass endEncoding];
+    CFBridgingRelease(pass->driverdata);
+    return 0;
+}
+
+static int
+METAL_GpuStartBlitPass(SDL_GpuBlitPass *pass)
+{
+    METAL_GpuBlitPassData *passdata = [[METAL_GpuBlitPassData alloc] init];
+    if (passdata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    METAL_GpuCommandBufferData *cmdbufdata = (__bridge METAL_GpuCommandBufferData *) pass->cmdbuf->driverdata;
+    passdata.mtlpass = [cmdbufdata.mtlcmdbuf blitCommandEncoder];
+    if (passdata.mtlpass == nil) {
+        return SDL_SetError("Failed to create Metal blit command encoder!");
+    }
+
+    pass->driverdata = (void *) CFBridgingRetain(passdata);
+
+    return 0;
+}
+
+static int
+METAL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+                             Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth,
+                             SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuTextureData *srctexdata = (__bridge METAL_GpuTextureData *) srctex->driverdata;
+    METAL_GpuTextureData *dsttexdata = (__bridge METAL_GpuTextureData *) dsttex->driverdata;
+    [passdata.mtlpass copyFromTexture:srctexdata.mtltexture
+                      sourceSlice:srcslice sourceLevel:srclevel sourceOrigin:MTLOriginMake(srcx, srcy, srcz) sourceSize:MTLSizeMake(srcw, srch, srcdepth)
+                      toTexture:dsttexdata.mtltexture destinationSlice:dstslice destinationLevel:dstlevel destinationOrigin:MTLOriginMake(dstx, dsty, dstz)];
+    return 0;
+}
+
+static int
+METAL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuBufferData *bufferdata = (__bridge METAL_GpuBufferData *) buffer->driverdata;
+    [passdata.mtlpass fillBuffer:bufferdata.mtlbuffer range:NSMakeRange(offset, length) value:value];
+    return 0;
+}
+
+static int
+METAL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuTextureData *texdata = (__bridge METAL_GpuTextureData *) texture->driverdata;
+    [passdata.mtlpass generateMipmapsForTexture:texdata.mtltexture];
+    return 0;
+}
+
+static int
+BlitPassCopyBetweenBuffers(SDL_GpuBlitPass *pass, void *_srcbufdata, Uint32 srcoffset, void *_dstbufdata, Uint32 dstoffset, Uint32 length)
+{
+    // Metal abstract CPU and GPU buffers behind the same class, so we can copy either direction with the same code.
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuBufferData *srcbufdata = (__bridge METAL_GpuBufferData *) _srcbufdata;
+    METAL_GpuBufferData *dstbufdata = (__bridge METAL_GpuBufferData *) _dstbufdata;
+    [passdata.mtlpass copyFromBuffer:srcbufdata.mtlbuffer sourceOffset:srcoffset toBuffer:dstbufdata.mtlbuffer, destinationOffset:dstoffset size:length];
+    return 0;
+}
+
+static int
+METAL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+    return BlitPassCopyBetweenBuffers(pass, srcbuf->driverdata, srcoffset, dstbuf->driverdata, dstoffset, length);
+}
+
+static int
+METAL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+    return BlitPassCopyBetweenBuffers(pass, srcbuf->driverdata, srcoffset, dstbuf->driverdata, dstoffset, length);
+}
+
+static int
+METAL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+{
+    return BlitPassCopyBetweenBuffers(pass, srcbuf->driverdata, srcoffset, dstbuf->driverdata, dstoffset, length);
+}
+
+static int
+METAL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuBufferData *srcbufdata = (__bridge METAL_GpuBufferData *) srcbuf->driverddata;
+    METAL_GpuTextureData *dsttexdata = (__bridge METAL_GpuTextureData *) dsttex->driverdata;
+    [passdata.mtlpass copyFromBuffer:srcbufdata.mtlbuffer
+                      sourceOffset:srcoffset sourceBytesPerRow:srcpitch sourceBytesPerImage:srcimgpitch sourceSize:MTLMakeSize(srcw, srch, srcdepth)
+                      toTexture:dsttxtdata.mtltexture destinationSlice:dstslice destinationLevel:dstlevel destinationOrigin:MTLMakeOrigin(dstx, dsty, dstz)];
+    return 0;
+}
+
+static int
+METAL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    METAL_GpuTextureData *srctexdata = (__bridge METAL_GpuTextureData *) srctex->driverdata;
+    METAL_GpuBufferData *dstbufdata = (__bridge METAL_GpuBufferData *) dstbuf->driverddata;
+    [passdata.mtlpass copyFromTexture:srctexdata.mtltexture
+                      sourceSlice:srcslice sourceLevel:srclevel sourceOrigin:MTLMakeOrigin(srcx, srcy, srcz) sourceSize:MTLMakeSize(srcw, srch, srcdepth)
+                      toBuffer:dstbufdata.mtlbuffer destinationOffset:dstoffset destinationBytesPerRow:dstpitch destinationBytesPerImage:dstimgpitch];
+    return 0;
+}
+
+static int
+METAL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
+{
+    METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
+    [passdata.mtlpass endEncoding];
+    CFBridgingRelease(pass->driverdata);
+    return 0;
+}
+
+static int
+METAL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence)
+{
+    METAL_GpuCommandBufferData *cmdbufdata = (__bridge METAL_GpuCommandBufferData *) cmdbuf->driverdata;
+    if (fence) {
+        [cmdbufdata.mtlcmdbuf addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+            METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
+            SDL_LockMutex(fencedata.mutex);
+            SDL_AtomicSet(&fencedata.flag, 1);
+            SDL_CondBroadcast(fencedata.condition);
+            SDL_UnlockMutex(fencedata.mutex);
+        }];
+    }
+    [cmdbufdata.mtlcmdbuf commit];
+    CFBridgingRelease(cmdbuf->driverdata);
+    return 0;
+}
+
+static void
+METAL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer)
+{
+    CFBridgingRelease(buffer->driverdata);  // !!! FIXME: I guess maybe it abandons if reference count drops to zero...?
+}
+
+static int
+METAL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *texture)
+{
+    METAL_GpuWindowData *windata = (__bridge METAL_GpuWindowData *) window->gpu_driverdata;
+    METAL_GpuTextureData *texturedata = nil;
+
+    SDL_assert(windata.mtldrawable == nil);   // higher level should have checked this.
+
+    texturedata = [[METAL_GpuTextureData alloc] init];
+    if (texturedata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    texturedata.mtltexture = nil;
+    windata.mtldrawable = [windata.mtllayer nextDrawable];
+    if (windata.mtldrawable == nil) {
+        return SDL_SetError("Failed to get next Metal drawable. Your window might be minimized?");
+    }
+    texturedata.mtltexture = windata.mtldrawable.texture;
+    texturedata.mtltexture.label = [NSString stringWithUTF8String:texture->label];
+
+    texture->desc.width = texturedata.mtltexture.width;
+    texture->desc.height = texturedata.mtltexture.height;
+    texture->desc.pixel_format = PixelFormatFromMetal(texturedata.mtltexture.pixelFormat);
+    if (texture->pixel_format == SDL_GPUPIXELFMT_INVALID) {
+        SDL_assert(!"Uhoh, we might need to add a new pixel format to SDL_gpu.h");
+        windata.mtldrawable = nil;
+        return -1;
+    }
+
+    texture->driverdata = (void *) CFBridgingRetain(texturedata);
+    return 0;
+}
+
+static int
+METAL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, SDL_GpuTexture *backbuffer, int swapinterval)
+{
+    METAL_GpuWindowData *windata = (__bridge METAL_GpuWindowData *) window->gpu_driverdata;
+    METAL_GpuTextureData *texturedata = (__bridge METAL_GpuTextureData *) backbuffer->driverdata;
+
+    SDL_assert(windata.mtldrawable != nil);  // higher level should have checked this.
+
+    [windata.mtldrawable present];
+
+    // let ARC clean things up.
+    texturedata.mtltexture = nil;
+    windata.mtldrawable = nil;
+    CFBridgingRelease(backbuffer->driverdata);
+    backbuffer->driverdata = NULL;  // just in case.
+    return 0;
+}
+
+static int
+METAL_GpuCreateFence(SDL_GpuFence *fence)
+{
+    METAL_GpuFenceData *fencedata = [[METAL_GpuFenceData alloc] init];
+    if (fencedata == nil) {
+        return SDL_OutOfMemory();
+    }
+
+    fencedata.mutex = SDL_CreateMutex();
+    if (!fencedata.mutex) {
+        return -1;
+    }
+
+    fencedata.condition = SDL_CreateCond();
+    if (!fencedata.condition) {
+        return -1;
+    }
+
+    SDL_AtomicSet(&fencedata.flag, 0);
+
+    fence->driverdata = (void *) CFBridgingRetain(fencedata);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyFence(SDL_GpuFence *fence)
+{
+    CFBridgingRelease(fence->driverdata);
+}
+
+static int
+METAL_GpuQueryFence(SDL_GpuFence *fence)
+{
+    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
+    return SDL_AtomicGet(&fencedata.flag);
+}
+
+static int
+METAL_GpuResetFence(SDL_GpuFence *fence)
+{
+    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
+    SDL_AtomicSet(&fencedata.flag, 0);
+    return 0;
+}
+
+static int
+METAL_GpuWaitFence(SDL_GpuFence *fence)
+{
+    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
+
+    if (SDL_LockMutex(fencedata.mutex) == -1) {
+        return -1;
+    }
+
+    while (SDL_AtomicGet(&fencedata.flag) == 0) {
+        if (SDL_CondWait(fencedata.condition, fencedata.mutex) == -1) {
+            SDL_UnlockMutex(fencedata.mutex);
+            return -1;
+        }
+    }
+
+    SDL_UnlockMutex(fencedata.mutex);
+
+    return 0;
+}
+
+static void
+METAL_GpuDestroyDevice(SDL_GpuDevice *device)
+{
+    CFBridgingRelease(device->driverdata);
+}
+
+static int
+IsMetalAvailable(void)
+{
+    // !!! FIXME: iOS 8 has Metal but is missing a few small features they fixed in iOS 9.
+    // !!! FIXME: It can probably limp along, but we should probably just refuse to run there. It's ancient anyhow.
+    SDL_VideoDevice *viddev = SDL_GetVideoDevice();
+
+    SDL_assert(viddev != NULL);
+
+    if ((SDL_strcmp(viddev, "cocoa") != 0) && (SDL_strcmp(viddev, "uikit") != 0)) {
+        return SDL_SetError("Metal GPU driver only supports Cocoa and UIKit video targets at the moment.");
+    }
+
+    // this checks a weak symbol.
+#if (defined(__MACOSX__) && (MAC_OS_X_VERSION_MIN_REQUIRED < 101100))
+    if (MTLCreateSystemDefaultDevice == NULL) {  // probably on 10.10 or lower.
+        return SDL_SetError("Metal framework not available on this system");
+    }
+#endif
+
+    return 0;
+}
+
+static int
+METAL_GpuCreateDevice(SDL_GpuDevice *device)
+{
+    METAL_GpuDeviceData *devdata;
+
+    if (IsMetalAvailable() == -1) {
+        return -1;
+    }
+
+    devdata = [[METAL_GpuDeviceData alloc] init];
+    if (!devdata) {
+        return SDL_OutOfMemory();
+    }
+
+    // !!! FIXME: MTLCopyAllDevices() can find other GPUs on macOS...
+    devdata.mtldevice = MTLCreateSystemDefaultDevice();
+    if (devdata.mtldevice == nil) {
+        return SDL_SetError("Failed to obtain Metal device");
+    }
+
+    devdata.mtlcmdqueue = [devdata.mtldevice newCommandQueue];
+    if (devdata.mtlcmdqueue == nil) {
+        return SDL_SetError("Failed to create Metal command queue");
+    }
+
+    if (device->label) {
+        devdata.mtlcmdqueue.label = [NSString stringWithUTF8String:device->label];
+    }
+
+    device->driverdata = (void *) CFBridgingRetain(devdata);
+    device->DestroyDevice = METAL_GpuDestroyDevice;
+    device->ClaimWindow = METAL_GpuClaimWindow;
+    device->CreateCpuBuffer = METAL_GpuCreateCpuBuffer;
+    device->DestroyCpuBuffer = METAL_GpuDestroyCpuBuffer;
+    device->LockCpuBuffer = METAL_GpuLockCpuBuffer;
+    device->UnlockCpuBuffer = METAL_GpuUnlockCpuBuffer;
+    device->CreateBuffer = METAL_GpuCreateBuffer;
+    device->DestroyBuffer = METAL_GpuDestroyBuffer;
+    device->CreateTexture = METAL_GpuCreateTexture;
+    device->DestroyTexture = METAL_GpuDestroyTexture;
+    device->CreateShader = METAL_GpuCreateShader;
+    device->DestroyShader = METAL_GpuDestroyShader;
+    device->CreatePipeline = METAL_GpuCreatePipeline;
+    device->DestroyPipeline = METAL_GpuDestroyPipeline;
+    device->CreateSampler = METAL_GpuCreateSampler;
+    device->DestroySampler = METAL_GpuDestroySampler;
+    device->CreateCommandBuffer = METAL_GpuCreateCommandBuffer;
+    device->StartRenderPass = METAL_GpuStartRenderPass;
+    device->SetRenderPassPipeline = METAL_GpuSetRenderPassPipeline;
+    device->SetRenderPassViewport = METAL_GpuSetRenderPassViewport;
+    device->SetRenderPassScissor = METAL_GpuSetRenderPassScissor;
+    device->SetRenderPassBlendConstant = METAL_GpuSetRenderPassBlendConstant;
+    device->SetRenderPassVertexBuffer = METAL_GpuSetRenderPassVertexBuffer;
+    device->SetRenderPassVertexSampler = METAL_GpuSetRenderPassVertexSampler;
+    device->SetRenderPassVertexTexture = METAL_GpuSetRenderPassVertexTexture;
+    device->SetRenderPassFragmentBuffer = METAL_GpuSetRenderPassFragmentBuffer;
+    device->SetRenderPassFragmentSampler = METAL_GpuSetRenderPassFragmentSampler;
+    device->SetRenderPassFragmentTexture = METAL_GpuSetRenderPassFragmentTexture;
+    device->Draw = METAL_GpuDraw;
+    device->DrawIndexed = METAL_GpuDrawIndexed;
+    device->DrawInstanced = METAL_GpuDrawInstanced;
+    device->DrawInstancedIndexed = METAL_GpuDrawInstancedIndexed;
+    device->EndRenderPass = METAL_GpuEndRenderPass;
+    device->StartBlitPass = METAL_GpuStartBlitPass;
+    device->CopyBetweenTextures = METAL_GpuCopyBetweenTextures;
+    device->FillBuffer = METAL_GpuFillBuffer;
+    device->GenerateMipmaps = METAL_GpuGenerateMipmaps;
+    device->CopyBufferCpuToGpu = METAL_GpuCopyBufferCpuToGpu;
+    device->CopyBufferGpuToCpu = METAL_GpuCopyBufferGpuToCpu;
+    device->CopyBufferGpuToGpu = METAL_GpuCopyBufferGpuToGpu;
+    device->CopyFromBufferToTexture = METAL_GpuCopyFromBufferToTexture;
+    device->CopyFromTextureToBuffer = METAL_GpuCopyFromTextureToBuffer;
+    device->EndBlitPass = METAL_GpuEndBlitPass;
+    device->SubmitCommandBuffer = METAL_GpuSubmitCommandBuffer;
+    device->AbandonCommandBuffer = METAL_GpuAbandonCommandBuffer;
+    device->GetBackbuffer = METAL_GpuGetBackbuffer;
+    device->Present = METAL_GpuPresent;
+    device->CreateFence = METAL_GpuCreateFence;
+    device->DestroyFence = METAL_GpuDestroyFence;
+    device->QueryFence = METAL_GpuQueryFence;
+    device->ResetFence = METAL_GpuResetFence;
+    device->WaitFence = METAL_GpuWaitFence;
+
+    return 0;
+}
+
+const SDL_GpuDriver METAL_GpuDriver = {
+    "metal", METAL_GpuCreateDevice
+};
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/video/SDL_sysvideo.h b/src/video/SDL_sysvideo.h
index 3d7b414bdee85..6a50f73f8ebe8 100644
--- a/src/video/SDL_sysvideo.h
+++ b/src/video/SDL_sysvideo.h
@@ -110,6 +110,8 @@ struct SDL_Window
 
     void *gpu_device;  /* this is only used by the GPU API, don't touch. */
     void *gpu_backbuffer;  /* this is only used by the GPU API, don't touch. */
+    void *gpu_driverdata;  /* this is only used by the GPU API, don't touch. */
+    Uint64 gpu_framenum;  /* this is only used by the GPU API, don't touch. */
 
     SDL_Window *prev;
     SDL_Window *next;

From 2bd76d48d13e4a05739fb1b1ded7fd67ba94baff Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 May 2022 13:05:05 -0400
Subject: [PATCH 39/54] gpu: Patched Metal backend to compile.

Still doesn't _do_ anything, as we don't have shader support figured out
at any level yet, or have the higher level API symbols exported from
SDL itself yet.
---
 src/gpu/metal/SDL_gpu_metal.m | 268 +++++++++++++++++-----------------
 1 file changed, 131 insertions(+), 137 deletions(-)

diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index 037438d6b51fd..863a6e20bd72a 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -25,7 +25,9 @@
 /* The Apple Metal driver for the GPU subsystem. */
 
 #include "SDL.h"
+#include "SDL_syswm.h"
 #include "../SDL_sysgpu.h"
+#include "../../video/SDL_sysvideo.h"
 
 #include <Availability.h>
 #import <Metal/Metal.h>
@@ -40,6 +42,7 @@
 #error Please build with ARC support.
 #endif
 
+
 @interface METAL_GpuDeviceData : NSObject
     @property (nonatomic, retain) id<MTLDevice> mtldevice;
     @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
@@ -60,10 +63,10 @@ @implementation METAL_GpuWindowData
 
 
 @interface METAL_GpuBufferData : NSObject  // this covers CPU and GPU buffers.
-    @property (nonatomic, retain) id<MTLBuffer> mtlbuf;
+    @property (nonatomic, retain) id<MTLBuffer> mtlbuffer;
 @end
 
-@implementation METAL_GpuWindowData
+@implementation METAL_GpuBufferData
 @end
 
 
@@ -84,18 +87,17 @@ @implementation METAL_GpuShaderData
 @interface METAL_GpuPipelineData : NSObject
     @property (nonatomic, retain) id<MTLRenderPipelineState> mtlpipeline;
     @property (nonatomic, retain) id<MTLDepthStencilState> mtldepthstencil;
-
     // these are part of the SDL GPU pipeline but not MTLRenderPipelineState, so we
     // keep a copy and set them when setting a new pipeline state.
-    MTLPrimitiveType mtlprimitive;
-    MTLTriangleFillMode mtlfillmode;
-    MTLWinding mtlfrontface;
-    MTLCullMode mtlcullface;
-    float depth_bias;
-    float depth_bias_scale;
-    float depth_bias_clamp;
-    Uint32 front_stencil_reference;
-    Uint32 back_stencil_reference;
+    @property (nonatomic, assign) MTLPrimitiveType mtlprimitive;
+    @property (nonatomic, assign) MTLTriangleFillMode mtlfillmode;
+    @property (nonatomic, assign) MTLWinding mtlfrontface;
+    @property (nonatomic, assign) MTLCullMode mtlcullface;
+    @property (nonatomic, assign) float depth_bias;
+    @property (nonatomic, assign) float depth_bias_scale;
+    @property (nonatomic, assign) float depth_bias_clamp;
+    @property (nonatomic, assign) Uint32 front_stencil_reference;
+    @property (nonatomic, assign) Uint32 back_stencil_reference;
 @end
 
 @implementation METAL_GpuPipelineData
@@ -120,21 +122,21 @@ @interface METAL_GpuRenderPassData : NSObject
     // current state of things, so we don't re-set a currently set state.
     @property (nonatomic, retain) id<MTLRenderPipelineState> mtlpipeline;
     @property (nonatomic, retain) id<MTLDepthStencilState> mtldepthstencil;
-    MTLPrimitiveType mtlprimitive;
-    MTLTriangleFillMode mtlfillmode;
-    MTLWinding mtlfrontface;
-    MTLCullMode mtlcullface;
-    float depth_bias;
-    float depth_bias_scale;
-    float depth_bias_clamp;
-    Uint32 front_stencil_reference;
-    Uint32 back_stencil_reference;
-    MTLViewport viewport;
-    MTLScissorRect scissor;
-    float blend_constant_red;
-    float blend_constant_green;
-    float blend_constant_blue;
-    float blend_constant_alpha;
+    @property (nonatomic, assign) MTLViewport viewport;
+    @property (nonatomic, assign) MTLScissorRect scissor;
+    @property (nonatomic, assign) MTLPrimitiveType mtlprimitive;
+    @property (nonatomic, assign) MTLTriangleFillMode mtlfillmode;
+    @property (nonatomic, assign) MTLWinding mtlfrontface;
+    @property (nonatomic, assign) MTLCullMode mtlcullface;
+    @property (nonatomic, assign) float depth_bias;
+    @property (nonatomic, assign) float depth_bias_scale;
+    @property (nonatomic, assign) float depth_bias_clamp;
+    @property (nonatomic, assign) Uint32 front_stencil_reference;
+    @property (nonatomic, assign) Uint32 back_stencil_reference;
+    @property (nonatomic, assign) float blend_constant_red;
+    @property (nonatomic, assign) float blend_constant_green;
+    @property (nonatomic, assign) float blend_constant_blue;
+    @property (nonatomic, assign) float blend_constant_alpha;
 @end
 
 @implementation METAL_GpuRenderPassData
@@ -147,25 +149,17 @@ @interface METAL_GpuBlitPassData : NSObject
 @implementation METAL_GpuBlitPassData
 @end
 
-@interface METAL_GpuFenceData : NSObject
-    @property (nonatomic, assign) SDL_atomic_t flag;
-    @property (nonatomic, assign) SDL_mutex *mutex;
-    @property (nonatomic, assign) SDL_cond *condition;
-@end
-
-@implementation METAL_GpuFenceData
-- (void)dealloc
+// everything else is wrapped in an Objective-C object to let ARC
+// handle memory management and object lifetimes, but this is all
+// SDL objects that need to be manually destroyed and an atomic
+// that doesn't play well with @properties, so I just went with
+// a struct I malloc myself for this one.
+typedef struct METAL_GpuFenceData
 {
-    if (self.mutex) {
-        SDL_DestroyMutex(self.mutex);
-    }
-
-    if (self.condition) {
-        SDL_DestroyCondition(self.condition);
-    }
-}
-@end
-
+    SDL_atomic_t flag;
+    SDL_mutex *mutex;
+    SDL_cond *condition;
+} METAL_GpuFenceData;
 
 #define METAL_PIXFMT_MAPPINGS \
     METAL_MAPPIXFMT(SDL_GPUPIXELFMT_B5G6R5, MTLPixelFormatB5G6R5Unorm) \
@@ -196,6 +190,7 @@ - (void)dealloc
     switch (fmt) {
         #define METAL_MAPPIXFMT(sdlfmt, mtlfmt) case mtlfmt: return sdlfmt;
         METAL_PIXFMT_MAPPINGS
+        default: break;
         #undef METAL_MAPPIXFMT
     }
 
@@ -261,7 +256,7 @@ - (void)dealloc
     }
 
     SDL_assert(!"Unexpected blend operation");
-    return MTLVertexFormatAdd;
+    return MTLBlendOperationAdd;
 }
 
 static MTLBlendFactor
@@ -521,7 +516,7 @@ - (void)dealloc
         return SDL_OutOfMemory();
     }
 
-    windata.backbuffer = nil;
+    windata.mtldrawable = nil;
 
     if (!(window_flags & SDL_WINDOW_METAL)) {
         changed_window = SDL_TRUE;
@@ -550,7 +545,8 @@ - (void)dealloc
     layer = (CAMetalLayer *)[(__bridge UIView *)windata.mtlview layer];
 #endif
 
-    layer.device = data.mtldevice;
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) device->driverdata;
+    layer.device = devdata.mtldevice;
     layer.framebufferOnly = NO;
     windata.mtllayer = layer;
 
@@ -571,17 +567,17 @@ - (void)dealloc
     }
 
     if (data != NULL) {
-        bufferdata.mtlbuf = [devdata->mtldevice newBufferWithBytes:data length:buffer->buflen options:MTLResourceStorageModeShared];
+        bufferdata.mtlbuffer = [devdata.mtldevice newBufferWithBytes:data length:buffer->buflen options:MTLResourceStorageModeShared];
     } else {
-        bufferdata.mtlbuf = [devdata->mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModeShared];
+        bufferdata.mtlbuffer = [devdata.mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModeShared];
     }
 
-    if (bufferdata.mtlbuf == nil) {
+    if (bufferdata.mtlbuffer == nil) {
         SDL_SetError("Failed to create Metal buffer!");
     }
 
     if (buffer->label) {
-        bufferdata.mtlbuf.label = [NSString stringWithUTF8String:buffer->label];
+        bufferdata.mtlbuffer.label = [NSString stringWithUTF8String:buffer->label];
     }
 
     buffer->driverdata = (void *) CFBridgingRetain(bufferdata);
@@ -599,7 +595,7 @@ - (void)dealloc
 METAL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer)
 {
     METAL_GpuBufferData *bufdata = (__bridge METAL_GpuBufferData *) buffer->driverdata;
-    void *retval = [bufdata->mtlbuf contents];
+    void *retval = [bufdata.mtlbuffer contents];
     SDL_assert(retval != NULL);  // should only return NULL for private (GPU-only) buffers.
     return retval;
 }
@@ -621,13 +617,13 @@ - (void)dealloc
         return SDL_OutOfMemory();
     }
 
-    bufferdata.mtlbuf = [devdata->mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModePrivate];
-    if (bufferdata.mtlbuf == nil) {
+    bufferdata.mtlbuffer = [devdata.mtldevice newBufferWithLength:buffer->buflen options:MTLResourceStorageModePrivate];
+    if (bufferdata.mtlbuffer == nil) {
         SDL_SetError("Failed to create Metal buffer!");
     }
 
     if (buffer->label) {
-        bufferdata.mtlbuf.label = [NSString stringWithUTF8String:buffer->label];
+        bufferdata.mtlbuffer.label = [NSString stringWithUTF8String:buffer->label];
     }
 
     buffer->driverdata = (void *) CFBridgingRetain(bufferdata);
@@ -666,12 +662,14 @@ - (void)dealloc
         default: return SDL_SetError("Unsupported texture type");
     };
 
+    (void) is_array;  // (we don't actually use this at the moment, silence compiler warning.)
+
     MTLTextureUsage mtltexusage = (MTLTextureUsage) 0;
     if (desc->usage & SDL_GPUTEXUSAGE_SHADER_READ) {
         mtltexusage |= MTLTextureUsageShaderRead;
     }
     if (desc->usage & SDL_GPUTEXUSAGE_SHADER_WRITE) {
-        mtltexusage |= MTLTextureUsageShaderWite;
+        mtltexusage |= MTLTextureUsageShaderWrite;
     }
     if (desc->usage & SDL_GPUTEXUSAGE_RENDER_TARGET) {
         mtltexusage |= MTLTextureUsageRenderTarget;
@@ -682,7 +680,7 @@ - (void)dealloc
         return SDL_OutOfMemory();
     }
 
-    texturedata.mtldrawable = nil;
+    texturedata.mtltexture = nil;
 
     // !!! FIXME: does ARC know what to do with these, since it doesn't start with "alloc" or "new"?
     MTLTextureDescriptor *mtltexdesc;
@@ -721,7 +719,7 @@ - (void)dealloc
     //mtltexdesc.swizzle = blahblahblah;
 
     METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) texture->device->driverdata;
-    texturedata.mtltexture = [devdata->device newTextureWithDescriptor:mtltexdesc];
+    texturedata.mtltexture = [devdata.mtldevice newTextureWithDescriptor:mtltexdesc];
     if (texturedata.mtltexture == nil) {
         SDL_SetError("Failed to create Metal texture!");
     }
@@ -824,7 +822,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 
     // Not available before iOS 12.
     if ([mtlpipedesc respondsToSelector:@selector(inputPrimitiveTopology)]) {
-        mtltexdesc.inputPrimitiveTopology = PrimitiveTopologyToMetal(desc->primitive);
+        mtlpipedesc.inputPrimitiveTopology = PrimitiveTopologyToMetal(desc->primitive);
     }
 
     // these arrived in later releases, but we _probably_ want the defaults anyhow (and/or we don't support it).
@@ -879,8 +877,8 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 
     NSError *err = nil;
     METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) pipeline->device->driverdata;
-    pipelinedata.mtldepthstencil = [devdata->mtldevice newDepthStencilStateWithDescriptor:mtldepthstencildesc];
-    pipelinedata.mtlpipeline = [devdata->mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
+    pipelinedata.mtldepthstencil = [devdata.mtldevice newDepthStencilStateWithDescriptor:mtldepthstencildesc];
+    pipelinedata.mtlpipeline = [devdata.mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
     pipelinedata.mtlprimitive = PrimitiveToMetal(desc->primitive);
     pipelinedata.mtlfillmode = FillModeToMetal(desc->fill_mode);
     pipelinedata.mtlfrontface = FrontFaceToMetal(desc->front_face);
@@ -934,7 +932,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     mtlsamplerdesc.minFilter = SamplerMinMagFilterToMetal(desc->min_filter);
     mtlsamplerdesc.magFilter = SamplerMinMagFilterToMetal(desc->mag_filter);
     mtlsamplerdesc.mipFilter = SamplerMipFilterToMetal(desc->mip_filter);
-    mtlsamplerdesc.maxAnistropy = desc->max_anisotropy;
+    mtlsamplerdesc.maxAnisotropy = desc->max_anisotropy;
 
     // !!! FIXME: add these?
     //mtlsamplerdesc.lodMinClamp
@@ -943,8 +941,8 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     //mtlsamplerdesc.compareFunction
     //mtlsamplerdesc.supportArgumentBuffers
 
-    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) pipeline->device->driverdata;
-    samplerdata.mtlsampler = [devdata->mtldevice newSamplerStateWithDescriptor:mtlsamplerdesc];
+    METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) sampler->device->driverdata;
+    samplerdata.mtlsampler = [devdata.mtldevice newSamplerStateWithDescriptor:mtlsamplerdesc];
     if (samplerdata.mtlsampler == nil) {
         return SDL_SetError("Failed to create Metal sampler!");
     }
@@ -1025,7 +1023,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
         METAL_GpuTextureData *stenciltexturedata = (__bridge METAL_GpuTextureData *) stencil_attachment->texture->driverdata;
         mtlpassdesc.stencilAttachment.texture = stenciltexturedata.mtltexture;
         mtlpassdesc.stencilAttachment.loadAction = LoadActionToMetal(stencil_attachment->stencil_init);
-        mtlpassdesc.stencilAttachment.clearDepth = stencil_attachment->clear_stencil;
+        mtlpassdesc.stencilAttachment.clearStencil = stencil_attachment->clear_stencil;
     }
 
     METAL_GpuCommandBufferData *cmdbufdata = (__bridge METAL_GpuCommandBufferData *) pass->cmdbuf->driverdata;
@@ -1048,16 +1046,10 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     passdata.depth_bias_clamp = 0.0f;
     passdata.front_stencil_reference = 0x00000000;
     passdata.back_stencil_reference = 0x00000000;
-    passdata.viewport.originX = 0.0;
-    passdata.viewport.originY = 0.0;
-    passdata.viewport.width = colatt0 ? (double) colatt0->width : 0.0;
-    passdata.viewport.height = colatt0 ? (double) colatt0->height : 0.0;
-    passdata.viewport.znear = 0.0;
-    passdata.viewport.zfar = 1.0;
-    passdata.scissor.x = 0.0;
-    passdata.scissor.y = 0.0;
-    passdata.scissor.width = colatt0 ? colatt0->width : 0;
-    passdata.scissor.height = colatt0 ? colatt0->height : 0;
+    const MTLViewport initialvp = { 0.0, 0.0, colatt0 ? (double) colatt0->width : 0.0, colatt0 ? (double) colatt0->height : 0.0, 0.0, 1.0 };
+    passdata.viewport = initialvp;
+    const MTLScissorRect initialscis = { 0.0, 0.0, colatt0 ? (double) colatt0->width : 0, colatt0 ? (double) colatt0->height : 0 };
+    passdata.scissor = initialscis;
     passdata.blend_constant_red = 0.0f;
     passdata.blend_constant_green = 0.0f;
     passdata.blend_constant_blue = 0.0f;
@@ -1090,7 +1082,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     }
 
     if (passdata.mtlfrontface != pipelinedata.mtlfrontface) {
-        [passdata.mtlpass setFrontFaceWinding:pipelinedata.mtlfrontface];
+        [passdata.mtlpass setFrontFacingWinding:pipelinedata.mtlfrontface];
         passdata.mtlfrontface = pipelinedata.mtlfrontface;
     }
 
@@ -1125,16 +1117,12 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 METAL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, double x, double y, double width, double height, double znear, double zfar)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
-    if ( (x != pass.viewport.originX) || (y != pass.viewport.originY) ||
-         (width != pass.viewport.width) || (height != pass.viewport.height) ||
-         (znear != pass.viewport.znear) || (zfar != pass.viewport.zfar) ) {
-        passdata.viewport.originX = x;
-        passdata.viewport.originY = y;
-        passdata.viewport.width = width;
-        passdata.viewport.height = height;
-        passdata.viewport.znear = znear;
-        passdata.viewport.zfar = zfar;
-        [passdata.mtlpass setViewport:pass.viewport];
+    if ( (x != passdata.viewport.originX) || (y != passdata.viewport.originY) ||
+         (width != passdata.viewport.width) || (height != passdata.viewport.height) ||
+         (znear != passdata.viewport.znear) || (zfar != passdata.viewport.zfar) ) {
+        const MTLViewport vp = { x, y, width, height, znear, zfar };
+        passdata.viewport = vp;
+        [passdata.mtlpass setViewport:vp];
     }
     return 0;
 }
@@ -1143,13 +1131,11 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 METAL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, Uint32 x, Uint32 y, Uint32 width, Uint32 height)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
-    if ( (x != pass.scissor.x) || (y != pass.scissor.y) ||
-         (width != pass.scissor.width) || (height != pass.scissor.height) ) {
-        passdata.scissor.x = x;
-        passdata.scissor.y = y;
-        passdata.scissor.width = width;
-        passdata.scissor.height = height;
-        [passdata.mtlpass setScissorRect:pass.scissor];
+    if ( (x != passdata.scissor.x) || (y != passdata.scissor.y) ||
+         (width != passdata.scissor.width) || (height != passdata.scissor.height) ) {
+        const MTLScissorRect scis = { x, y, width, height };
+        passdata.scissor = scis;
+        [passdata.mtlpass setScissorRect:scis];
     }
     return 0;
 }
@@ -1162,8 +1148,8 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     const float green = (const float) dgreen;
     const float blue = (const float) dblue;
     const float alpha = (const float) dalpha;
-    if ( (red != pass.blend_constant_red) || (green != pass.blend_constant_green) ||
-         (blue != pass.blend_constant_blue) || (height != pass.blend_constant_alpha) ) {
+    if ( (red != passdata.blend_constant_red) || (green != passdata.blend_constant_green) ||
+         (blue != passdata.blend_constant_blue) || (alpha != passdata.blend_constant_alpha) ) {
         passdata.blend_constant_red = red;
         passdata.blend_constant_green = green;
         passdata.blend_constant_blue = blue;
@@ -1178,7 +1164,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuBufferData *bufdata = buffer ? (__bridge METAL_GpuBufferData *) buffer->driverdata : nil;
-    [passdata.mtlpass setVertexBuffer:((bufdata == nil) ? nil : bufdata.mltbuffer) offset:offset atIndex:index];
+    [passdata.mtlpass setVertexBuffer:((bufdata == nil) ? nil : bufdata.mtlbuffer) offset:offset atIndex:index];
     return 0;
 }
 
@@ -1187,7 +1173,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuSamplerData *samplerdata = sampler ? (__bridge METAL_GpuSamplerData *) sampler->driverdata : nil;
-    [passdata.mtlpass setVertexSamplerState:((samplerdata == nil) ? nil : samplerdata.mltsampler) atIndex:index];
+    [passdata.mtlpass setVertexSamplerState:((samplerdata == nil) ? nil : samplerdata.mtlsampler) atIndex:index];
     return 0;
 }
 
@@ -1196,7 +1182,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuTextureData *texturedata = texture ? (__bridge METAL_GpuTextureData *) texture->driverdata : nil;
-    [passdata.mtlpass setVertexTexture:((texturedata == nil) ? nil : texturedata.mlttexture) atIndex:index];
+    [passdata.mtlpass setVertexTexture:((texturedata == nil) ? nil : texturedata.mtltexture) atIndex:index];
     return 0;
 }
 
@@ -1205,7 +1191,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuBufferData *bufdata = buffer ? (__bridge METAL_GpuBufferData *) buffer->driverdata : nil;
-    [passdata.mtlpass setFragmentBuffer:((bufdata == nil) ? nil : bufdata.mltbuffer) offset:offset atIndex:index];
+    [passdata.mtlpass setFragmentBuffer:((bufdata == nil) ? nil : bufdata.mtlbuffer) offset:offset atIndex:index];
     return 0;
 }
 
@@ -1214,7 +1200,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuSamplerData *samplerdata = sampler ? (__bridge METAL_GpuSamplerData *) sampler->driverdata : nil;
-    [passdata.mtlpass setFragmentSampler:((samplerdata == nil) ? nil : samplerdata.mltsampler) atIndex:index];
+    [passdata.mtlpass setFragmentSamplerState:((samplerdata == nil) ? nil : samplerdata.mtlsampler) atIndex:index];
     return 0;
 }
 
@@ -1223,7 +1209,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuTextureData *texturedata = texture ? (__bridge METAL_GpuTextureData *) texture->driverdata : nil;
-    [passdata.mtlpass setFragmentTexture:((texturedata == nil) ? nil : texturedata.mlttexture) atIndex:index];
+    [passdata.mtlpass setFragmentTexture:((texturedata == nil) ? nil : texturedata.mtltexture) atIndex:index];
     return 0;
 }
 
@@ -1240,7 +1226,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuBufferData *idxbufdata = (__bridge METAL_GpuBufferData *) index_buffer->driverdata;
-    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) index_buffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset];
+    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) indexBuffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset];
     return 0;
 }
 
@@ -1257,7 +1243,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuRenderPassData *passdata = (__bridge METAL_GpuRenderPassData *) pass->driverdata;
     METAL_GpuBufferData *idxbufdata = (__bridge METAL_GpuBufferData *) index_buffer->driverdata;
-    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) index_buffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset instanceCount:instance_count baseVertex:base_vertex baseInstance:base_instance];
+    [passdata.mtlpass drawIndexedPrimitives:passdata.mtlprimitive indexCount:index_count indexType:IndexTypeToMetal(index_type) indexBuffer:idxbufdata.mtlbuffer indexBufferOffset:index_offset instanceCount:instance_count baseVertex:base_vertex baseInstance:base_instance];
     return 0;
 }
 
@@ -1328,7 +1314,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
     METAL_GpuBufferData *srcbufdata = (__bridge METAL_GpuBufferData *) _srcbufdata;
     METAL_GpuBufferData *dstbufdata = (__bridge METAL_GpuBufferData *) _dstbufdata;
-    [passdata.mtlpass copyFromBuffer:srcbufdata.mtlbuffer sourceOffset:srcoffset toBuffer:dstbufdata.mtlbuffer, destinationOffset:dstoffset size:length];
+    [passdata.mtlpass copyFromBuffer:srcbufdata.mtlbuffer sourceOffset:srcoffset toBuffer:dstbufdata.mtlbuffer destinationOffset:dstoffset size:length];
     return 0;
 }
 
@@ -1354,11 +1340,11 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 METAL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz)
 {
     METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
-    METAL_GpuBufferData *srcbufdata = (__bridge METAL_GpuBufferData *) srcbuf->driverddata;
+    METAL_GpuBufferData *srcbufdata = (__bridge METAL_GpuBufferData *) srcbuf->driverdata;
     METAL_GpuTextureData *dsttexdata = (__bridge METAL_GpuTextureData *) dsttex->driverdata;
     [passdata.mtlpass copyFromBuffer:srcbufdata.mtlbuffer
-                      sourceOffset:srcoffset sourceBytesPerRow:srcpitch sourceBytesPerImage:srcimgpitch sourceSize:MTLMakeSize(srcw, srch, srcdepth)
-                      toTexture:dsttxtdata.mtltexture destinationSlice:dstslice destinationLevel:dstlevel destinationOrigin:MTLMakeOrigin(dstx, dsty, dstz)];
+                      sourceOffset:srcoffset sourceBytesPerRow:srcpitch sourceBytesPerImage:srcimgpitch sourceSize:MTLSizeMake(srcw, srch, srcdepth)
+                      toTexture:dsttexdata.mtltexture destinationSlice:dstslice destinationLevel:dstlevel destinationOrigin:MTLOriginMake(dstx, dsty, dstz)];
     return 0;
 }
 
@@ -1367,9 +1353,9 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 {
     METAL_GpuBlitPassData *passdata = (__bridge METAL_GpuBlitPassData *) pass->driverdata;
     METAL_GpuTextureData *srctexdata = (__bridge METAL_GpuTextureData *) srctex->driverdata;
-    METAL_GpuBufferData *dstbufdata = (__bridge METAL_GpuBufferData *) dstbuf->driverddata;
+    METAL_GpuBufferData *dstbufdata = (__bridge METAL_GpuBufferData *) dstbuf->driverdata;
     [passdata.mtlpass copyFromTexture:srctexdata.mtltexture
-                      sourceSlice:srcslice sourceLevel:srclevel sourceOrigin:MTLMakeOrigin(srcx, srcy, srcz) sourceSize:MTLMakeSize(srcw, srch, srcdepth)
+                      sourceSlice:srcslice sourceLevel:srclevel sourceOrigin:MTLOriginMake(srcx, srcy, srcz) sourceSize:MTLSizeMake(srcw, srch, srcdepth)
                       toBuffer:dstbufdata.mtlbuffer destinationOffset:dstoffset destinationBytesPerRow:dstpitch destinationBytesPerImage:dstimgpitch];
     return 0;
 }
@@ -1389,11 +1375,11 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     METAL_GpuCommandBufferData *cmdbufdata = (__bridge METAL_GpuCommandBufferData *) cmdbuf->driverdata;
     if (fence) {
         [cmdbufdata.mtlcmdbuf addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
-            METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
-            SDL_LockMutex(fencedata.mutex);
-            SDL_AtomicSet(&fencedata.flag, 1);
-            SDL_CondBroadcast(fencedata.condition);
-            SDL_UnlockMutex(fencedata.mutex);
+            METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) fence->driverdata;
+            SDL_LockMutex(fencedata->mutex);
+            SDL_AtomicSet(&fencedata->flag, 1);
+            SDL_CondBroadcast(fencedata->condition);
+            SDL_UnlockMutex(fencedata->mutex);
         }];
     }
     [cmdbufdata.mtlcmdbuf commit];
@@ -1426,12 +1412,12 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
         return SDL_SetError("Failed to get next Metal drawable. Your window might be minimized?");
     }
     texturedata.mtltexture = windata.mtldrawable.texture;
-    texturedata.mtltexture.label = [NSString stringWithUTF8String:texture->label];
+    texturedata.mtltexture.label = [NSString stringWithUTF8String:texture->desc.label];
 
     texture->desc.width = texturedata.mtltexture.width;
     texture->desc.height = texturedata.mtltexture.height;
     texture->desc.pixel_format = PixelFormatFromMetal(texturedata.mtltexture.pixelFormat);
-    if (texture->pixel_format == SDL_GPUPIXELFMT_INVALID) {
+    if (texture->desc.pixel_format == SDL_GPUPIXELFMT_INVALID) {
         SDL_assert(!"Uhoh, we might need to add a new pixel format to SDL_gpu.h");
         windata.mtldrawable = nil;
         return -1;
@@ -1462,24 +1448,27 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 static int
 METAL_GpuCreateFence(SDL_GpuFence *fence)
 {
-    METAL_GpuFenceData *fencedata = [[METAL_GpuFenceData alloc] init];
-    if (fencedata == nil) {
+    METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) SDL_calloc(1, sizeof (METAL_GpuFenceData));
+    if (fencedata == NULL) {
         return SDL_OutOfMemory();
     }
 
-    fencedata.mutex = SDL_CreateMutex();
-    if (!fencedata.mutex) {
+    fencedata->mutex = SDL_CreateMutex();
+    if (!fencedata->mutex) {
+        SDL_free(fencedata);
         return -1;
     }
 
-    fencedata.condition = SDL_CreateCond();
-    if (!fencedata.condition) {
+    fencedata->condition = SDL_CreateCond();
+    if (!fencedata->condition) {
+        SDL_DestroyMutex(fencedata->mutex);
+        SDL_free(fencedata);
         return -1;
     }
 
-    SDL_AtomicSet(&fencedata.flag, 0);
+    SDL_AtomicSet(&fencedata->flag, 0);
 
-    fence->driverdata = (void *) CFBridgingRetain(fencedata);
+    fence->driverdata = fencedata;
 
     return 0;
 }
@@ -1487,41 +1476,44 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 static void
 METAL_GpuDestroyFence(SDL_GpuFence *fence)
 {
-    CFBridgingRelease(fence->driverdata);
+    METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) fence->driverdata;
+    SDL_DestroyMutex(fencedata->mutex);
+    SDL_DestroyCond(fencedata->condition);
+    SDL_free(fencedata);
 }
 
 static int
 METAL_GpuQueryFence(SDL_GpuFence *fence)
 {
-    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
-    return SDL_AtomicGet(&fencedata.flag);
+    METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) fence->driverdata;
+    return SDL_AtomicGet(&fencedata->flag);
 }
 
 static int
 METAL_GpuResetFence(SDL_GpuFence *fence)
 {
-    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
-    SDL_AtomicSet(&fencedata.flag, 0);
+    METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) fence->driverdata;
+    SDL_AtomicSet(&fencedata->flag, 0);
     return 0;
 }
 
 static int
 METAL_GpuWaitFence(SDL_GpuFence *fence)
 {
-    METAL_GpuFenceData *fencedata = (__bridge METAL_GpuFenceData *) fence->driverdata;
+    METAL_GpuFenceData *fencedata = (METAL_GpuFenceData *) fence->driverdata;
 
-    if (SDL_LockMutex(fencedata.mutex) == -1) {
+    if (SDL_LockMutex(fencedata->mutex) == -1) {
         return -1;
     }
 
-    while (SDL_AtomicGet(&fencedata.flag) == 0) {
-        if (SDL_CondWait(fencedata.condition, fencedata.mutex) == -1) {
-            SDL_UnlockMutex(fencedata.mutex);
+    while (SDL_AtomicGet(&fencedata->flag) == 0) {
+        if (SDL_CondWait(fencedata->condition, fencedata->mutex) == -1) {
+            SDL_UnlockMutex(fencedata->mutex);
             return -1;
         }
     }
 
-    SDL_UnlockMutex(fencedata.mutex);
+    SDL_UnlockMutex(fencedata->mutex);
 
     return 0;
 }
@@ -1541,7 +1533,7 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 
     SDL_assert(viddev != NULL);
 
-    if ((SDL_strcmp(viddev, "cocoa") != 0) && (SDL_strcmp(viddev, "uikit") != 0)) {
+    if ((SDL_strcmp(viddev->name, "cocoa") != 0) && (SDL_strcmp(viddev->name, "uikit") != 0)) {
         return SDL_SetError("Metal GPU driver only supports Cocoa and UIKit video targets at the moment.");
     }
 
@@ -1645,4 +1637,6 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
     "metal", METAL_GpuCreateDevice
 };
 
+#endif
+
 /* vi: set ts=4 sw=4 expandtab: */

From 50ac3b806d2990fbe8eb4e4e81ec0f170df89c74 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 May 2022 13:06:24 -0400
Subject: [PATCH 40/54] gpu: Assume we have ARC support when building the Metal
 backend.

SDL started requiring ARC since I wrote the Metal code, so I've
rebased this branch passed that point and removed the check.
---
 src/gpu/metal/SDL_gpu_metal.m | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index 863a6e20bd72a..f379d127f4cbe 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -38,11 +38,6 @@
 #import <AppKit/NSView.h>
 #endif
 
-#if !__has_feature(objc_arc)
-#error Please build with ARC support.
-#endif
-
-
 @interface METAL_GpuDeviceData : NSObject
     @property (nonatomic, retain) id<MTLDevice> mtldevice;
     @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;

From a26b42ddad4b0a8ab49a02104b555d65e0c46d41 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 17 May 2022 23:47:01 -0400
Subject: [PATCH 41/54] gpu: Fixed incorrect comment.

---
 src/gpu/SDL_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 3fb920fbc4382..b86b360013991 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -503,7 +503,7 @@ crc32_append(Uint32 crc, const void *_buf, const size_t buflen)
 
 static Uint32 hash_pipeline(const void *key, void *data)
 {
-    /* this hashes most pointers; this hash is meant to be unique and contained in this process. As such, it also doesn't care about enum size or byte order. */
+    /* this hashes the shader pointers; this hash is meant to be unique and contained in this process. As such, it also doesn't care about enum size or byte order. */
     /* However, it _does_ care about uninitialized packing bytes, so it doesn't just hash the sizeof (object). */
     const SDL_GpuPipelineDescription *desc = (const SDL_GpuPipelineDescription *) key;
     Uint32 crc = CRC32_INIT_VALUE;

From b3e98d62972ac89817176cb2c87f036781bbf0ab Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Tue, 17 May 2022 23:47:13 -0400
Subject: [PATCH 42/54] gpu: Added vsync support for Metal backend.

---
 src/gpu/metal/SDL_gpu_metal.m | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index f379d127f4cbe..3d10ac845054b 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -51,6 +51,7 @@ @interface METAL_GpuWindowData : NSObject
     @property (nonatomic, assign) SDL_MetalView mtlview;
     @property (nonatomic, retain) CAMetalLayer *mtllayer;
     @property (nonatomic, retain) id<CAMetalDrawable> mtldrawable;  // current backbuffer
+    @property (nonatomic, assign) int swap_interval;
 @end
 
 @implementation METAL_GpuWindowData
@@ -544,6 +545,7 @@ @implementation METAL_GpuBlitPassData
     layer.device = devdata.mtldevice;
     layer.framebufferOnly = NO;
     windata.mtllayer = layer;
+    windata.swap_interval = windata.mtllayer.displaySyncEnabled ? 1 : 0;
 
     window->gpu_driverdata = (void *) CFBridgingRetain(windata);
 
@@ -1430,6 +1432,20 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 
     SDL_assert(windata.mtldrawable != nil);  // higher level should have checked this.
 
+    // before 10.13 (and always on iOS?), this was always vsync.
+    #if (defined(__MACOSX__) && defined(MAC_OS_X_VERSION_10_13)) || TARGET_OS_MACCATALYST
+    if (@available(macOS 10.13, *)) {
+        if (windata.swap_interval != swapinterval) {
+            if (swapinterval >= 1) {
+                windata.mtllayer.displaySyncEnabled = YES;
+            } else {
+                windata.mtllayer.displaySyncEnabled = NO;
+            }
+            windata.swap_interval = swapinterval;
+        }
+    }
+    #endif
+
     [windata.mtldrawable present];
 
     // let ARC clean things up.

From f988a25a9a447d734bc36b3b8b509159ed5b3783 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 18 May 2022 15:04:47 -0400
Subject: [PATCH 43/54] gpu: Add Metal to list of usable drivers.

This actually works enough to run test/testgpu_simple_clear.c correctly
on a Mac, if you statically link SDL (since none of the GPU API entry
points are exported from the shared library or in the dynapi table yet),
which is surprisingly motivating for something that does so little.

Obviously we need to figure out the shader plan before anything else
is going to work, though.
---
 src/gpu/SDL_gpu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index b86b360013991..873da5969a8be 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -28,8 +28,12 @@
 
 
 extern const SDL_GpuDriver DUMMY_GpuDriver;
+extern const SDL_GpuDriver METAL_GpuDriver;
 
 static const SDL_GpuDriver *gpu_drivers[] = {
+#ifdef SDL_GPU_METAL
+    &METAL_GpuDriver,
+#endif
     &DUMMY_GpuDriver
 };
 

From 922f97e15505b0eb9a0d15a974876a36b8fa7696 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Mon, 12 Sep 2022 10:24:24 -0400
Subject: [PATCH 44/54] gpu: First shot at testgpu_spinning_cube.c using the
 new shader language.

This won't actually work, because we're actually going to need bytecode
here, not something we compile, but it's better than having the GLSL code
there as a placeholder.
---
 test/testgpu_spinning_cube.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index 61d619d7ef15f..45e5dcbe5cbdd 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -237,23 +237,17 @@ static const VertexData vertex_data[] = {
     {  0.5, -0.5,  0.5, 1.0, 0.0, 1.0 } /* magenta */
 };
 
-/* !!! FIXME: these shaders need to change. This is just the GLES2 shaders right now. */
 static const char* shader_vert_src =
-" attribute vec4 av4position; "
-" attribute vec3 av3color; "
-" uniform mat4 mvp; "
-" varying vec3 vv3color; "
-" void main() { "
-"    vv3color = av3color; "
-"    gl_Position = mvp * av4position; "
-" } ";
-
-static const char* shader_frag_src =
-" precision lowp float; "
-" varying vec3 vv3color; "
-" void main() { "
-"    gl_FragColor = vec4(vv3color, 1.0); "
-" } ";
+    "struct VertexInputs { float4 position @attribute(0); float3 color @attribute(1); };"
+    "struct VertexOutputs { float4 position @position; float4 color; };"
+    "struct VertexUniforms { float4x4 mvp; };"
+    "function VertexOutputs vertex_main(VertexInputs inputs @inputs, VertexUniforms uniforms @buffer(0)) @vertex {"
+        "return VertexOutputs(mvp * inputs.position, float4(inputs.color, 1.0));"
+    "}"
+    "struct FragmentOutputs { float4 color @color; }"
+    "function FragmentOutputs fragment_main(VertexOutputs inputs @inputs) @fragment {"
+        "return FragmentOutputs(inputs.color);"
+    "}";
 
 static void
 Render(SDL_Window *window, const int windownum)
@@ -402,8 +396,8 @@ init_render_state(void)
     gpu_device = SDL_GpuCreateDevice("The GPU device", NULL);
     CHECK_CREATE(gpu_device, "GPU device");
 
-    vertex_shader = load_shader("Spinning cube vertex shader", shader_vert_src, "vertex");
-    fragment_shader = load_shader("Spinning cube fragment shader", shader_frag_src, "fragment");
+    vertex_shader = load_shader("Spinning cube vertex shader", shader_vert_src, "vertex_main");
+    fragment_shader = load_shader("Spinning cube fragment shader", shader_frag_src, "fragment_main");
 
     /* We just need to upload the static data once. */
     render_state.gpubuf_static = SDL_GpuCreateAndInitBuffer("Static vertex data GPU buffer", gpu_device, sizeof (vertex_data), vertex_data);

From a1b9104fde941d551b69d5369f568e071f70ea6e Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Jan 2023 21:18:14 -0500
Subject: [PATCH 45/54] gpu: Use new SDL3 SDL_(begin|close)_code.h header
 names.

---
 include/SDL_gpu.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/SDL_gpu.h b/include/SDL_gpu.h
index dfa6181736006..9190657560783 100644
--- a/include/SDL_gpu.h
+++ b/include/SDL_gpu.h
@@ -31,7 +31,8 @@
 #include "SDL_stdinc.h"
 #include "SDL_error.h"
 
-#include "begin_code.h"
+#include "SDL_begin_code.h"
+
 /* Set up for C function definitions, even when using C++ */
 #ifdef __cplusplus
 extern "C" {
@@ -794,7 +795,8 @@ void SDL_GpuDestroyFenceCycle(SDL_GpuFenceCycle *cycle);
 #ifdef __cplusplus
 }
 #endif
-#include "close_code.h"
+
+#include "SDL_close_code.h"
 
 #endif /* SDL_gpu_h_ */
 

From f3fe30928b43f35f8b7fde6c8298aed0ac818594 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Jan 2023 21:18:54 -0500
Subject: [PATCH 46/54] gpu: Move the GPU API headers into the new SDL3
 location.

---
 include/{ => SDL3}/SDL_gpu.h          | 0
 include/{ => SDL3}/SDL_gpu_compiler.h | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename include/{ => SDL3}/SDL_gpu.h (100%)
 rename include/{ => SDL3}/SDL_gpu_compiler.h (100%)

diff --git a/include/SDL_gpu.h b/include/SDL3/SDL_gpu.h
similarity index 100%
rename from include/SDL_gpu.h
rename to include/SDL3/SDL_gpu.h
diff --git a/include/SDL_gpu_compiler.h b/include/SDL3/SDL_gpu_compiler.h
similarity index 100%
rename from include/SDL_gpu_compiler.h
rename to include/SDL3/SDL_gpu_compiler.h

From 0988d2b54e9e98cd4d9720780dc40062927e9c89 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Jan 2023 21:26:40 -0500
Subject: [PATCH 47/54] gpu: The shader compiler isn't going to built into SDL
 directly.

It's an external tool that can be used offline or embedded into an
app for use at runtime, but SDL itself isn't going to embed a compiler.
---
 include/SDL3/SDL_gpu.h          |  2 +-
 include/SDL3/SDL_gpu_compiler.h | 60 ---------------------------------
 test/testgpu_spinning_cube.c    |  2 +-
 3 files changed, 2 insertions(+), 62 deletions(-)
 delete mode 100644 include/SDL3/SDL_gpu_compiler.h

diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index 9190657560783..48f550a80f263 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -243,7 +243,7 @@ SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTexture
 void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
 int SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
 
-/* compiling shaders is a different (and optional at runtime) piece, in SDL_gpu_compiler.h */
+/* compiling shaders is a different (and optional at runtime) piece, in github.com/libsdl-org/SDL_shader_tools */
 typedef struct SDL_GpuShader SDL_GpuShader;
 SDL_GpuShader *SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);  /* !!! FIXME: bytecode type enum? */
 /* !!! FIXME: add a query for platform/gpu specific blob that can be fed back next time for faster load times? */
diff --git a/include/SDL3/SDL_gpu_compiler.h b/include/SDL3/SDL_gpu_compiler.h
deleted file mode 100644
index 5b0fdba8d73cb..0000000000000
--- a/include/SDL3/SDL_gpu_compiler.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-  Simple DirectMedia Layer
-  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef SDL_gpu_compiler_h_
-#define SDL_gpu_compiler_h_
-
-/**
- *  \file SDL_gpu_compiler.h
- *
- *  Header for the SDL GPU compiler routines.
- */
-
-#include "SDL_gpu.h"
-
-#include "begin_code.h"
-/* Set up for C function definitions, even when using C++ */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* !!! FIXME: this all needs formal (and significantly more robust) documentation. */
-
-/*
- * This builds shader source code into bytecode. One could use this to cook
- * shaders offline, or pass dynamic strings at runtime. This is meant to favor
- * speed over optimization. If one really wants a strong optimizing compiler,
- * one should build an external tool.  :)
- */
-/* !!! FIXME: don't use a string for type, just an enum */
-int SDL_GpuCompileShader(const char *src, const int srclen, const char *type, const char *mainfn, Uint8 **result, Uint32 *resultlen);
-
-/* !!! FIXME: There's probably a lot of other stuff we want to put in here. */
-
-/* Ends C function definitions when using C++ */
-#ifdef __cplusplus
-}
-#endif
-#include "close_code.h"
-
-#endif /* SDL_gpu_compiler_h_ */
-
-/* vi: set ts=4 sw=4 expandtab: */
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index 45e5dcbe5cbdd..1d68eb7e55d82 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -20,7 +20,6 @@
 
 #include "SDL_test_common.h"
 #include "SDL_gpu.h"
-#include "SDL_gpu_compiler.h"
 
 typedef struct RenderState
 {
@@ -365,6 +364,7 @@ static SDL_GpuShader *load_shader(const char *label, const char *src, const char
     SDL_GpuShader *retval = NULL;
     Uint8 *bytecode = NULL;
     Uint32 bytecodelen = 0;
+    /* !!! FIXME: this is broken right now, we need to compile this with the external tools and just keep the binary embedded in here. */
     if (SDL_GpuCompileShader(src, -1, type, "main", &bytecode, &bytecodelen) == -1) {
         SDL_Log("Failed to compile %s shader: %s", type, SDL_GetError());
         quit(2);

From 720e7a274f5ced49d17e6d3c7eb318fd3d005980 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Jan 2023 23:46:12 -0500
Subject: [PATCH 48/54] gpu: Renamed all the API functions to match SDL3
 conventions.

---
 include/SDL3/SDL_gpu.h        | 165 +++++++++++------------
 src/gpu/SDL_gpu.c             | 244 +++++++++++++++++-----------------
 src/gpu/SDL_sysgpu.h          |  14 +-
 src/gpu/dummy/SDL_gpu_dummy.c |  12 +-
 src/gpu/metal/SDL_gpu_metal.m |  12 +-
 test/testgpu_simple_clear.c   |  20 +--
 test/testgpu_spinning_cube.c  |  74 +++++------
 7 files changed, 271 insertions(+), 270 deletions(-)

diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index 48f550a80f263..1e257f8abb4a9 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -159,23 +159,23 @@ extern "C" {
  * The list of drivers is static and based on what was compiled into SDL; it
  * does not change between calls to these functions.
  */
-Uint32 SDL_GpuGetNumDrivers(void);
-const char *SDL_GpuGetDriverName(Uint32 index);
+int SDL_GetNumGpuDrivers(void);
+const char *SDL_GetGpuDriver(int index);
 
 /* !!! FIXME: Enumerate physical devices. Right now this API doesn't allow it. */
 
 typedef struct SDL_GpuDevice SDL_GpuDevice;
-SDL_GpuDevice *SDL_GpuCreateDevice(const char *label, const char *driver);  /* `label` is for debugging, not a specific device name to access. */
-void SDL_GpuDestroyDevice(SDL_GpuDevice *device);
+SDL_GpuDevice *SDL_CreateGpuDevice(const char *label, const char *driver);  /* `label` is for debugging, not a specific device name to access. */
+void SDL_DestroyGpuDevice(SDL_GpuDevice *device);
 
 /* !!! FIXME: device caps */
 
 /* CPU buffers live in RAM and can be accessed by the CPU. */
-typedef struct SDL_GpuCpuBuffer SDL_GpuCpuBuffer;
-SDL_GpuCpuBuffer *SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
-void SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer);
-void *SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen);
-int SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
+typedef struct SDL_CpuBuffer SDL_CpuBuffer;
+SDL_CpuBuffer *SDL_CreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
+void SDL_DestroyCpuBuffer(SDL_CpuBuffer *buffer);
+void *SDL_LockCpuBuffer(SDL_CpuBuffer *buffer, Uint32 *_buflen);
+int SDL_UnlockCpuBuffer(SDL_CpuBuffer *buffer);
 
 /*
  * GPU buffers live in GPU-specific memory and can not be accessed by the CPU.
@@ -187,8 +187,8 @@ int SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer);
  *  upload is complete: SDL_GpuCreateAndInitBuffer
  */
 typedef struct SDL_GpuBuffer SDL_GpuBuffer;
-SDL_GpuBuffer *SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen);
-void SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer);
+SDL_GpuBuffer *SDL_CreateGpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen);
+void SDL_DestroyGpuBuffer(SDL_GpuBuffer *buffer);
 
 
 typedef enum SDL_GpuTextureType
@@ -239,15 +239,15 @@ typedef struct SDL_GpuTextureDescription
 } SDL_GpuTextureDescription;
 
 typedef struct SDL_GpuTexture SDL_GpuTexture;
-SDL_GpuTexture *SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
-void SDL_GpuDestroyTexture(SDL_GpuTexture *texture);
-int SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
+SDL_GpuTexture *SDL_CreateGpuTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc);
+void SDL_DestroyGpuTexture(SDL_GpuTexture *texture);
+int SDL_GetGpuTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc);
 
 /* compiling shaders is a different (and optional at runtime) piece, in github.com/libsdl-org/SDL_shader_tools */
 typedef struct SDL_GpuShader SDL_GpuShader;
-SDL_GpuShader *SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);  /* !!! FIXME: bytecode type enum? */
+SDL_GpuShader *SDL_CreateGpuShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen);  /* !!! FIXME: bytecode type enum? */
 /* !!! FIXME: add a query for platform/gpu specific blob that can be fed back next time for faster load times? */
-void SDL_GpuDestroyShader(SDL_GpuShader *shader);
+void SDL_DestroyGpuShader(SDL_GpuShader *shader);
 
 
 /* PRECOOKED STATE OBJECTS... */
@@ -443,14 +443,14 @@ typedef struct SDL_GpuPipelineDescription
 } SDL_GpuPipelineDescription;
 
 typedef struct SDL_GpuPipeline SDL_GpuPipeline;
-SDL_GpuPipeline *SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
-void SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline);
+SDL_GpuPipeline *SDL_CreateGpuPipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc);
+void SDL_DestroyGpuPipeline(SDL_GpuPipeline *pipeline);
 
 /* these make it easier to set up a Pipeline description; set the defaults (or
    start with an existing pipeline's state) then change what you like.
    Note that the `label` and shader fields are read-only; do not modify or free them! */
-void SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc);
-int SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
+void SDL_GetDefaultGpuPipelineDescription(SDL_GpuPipelineDescription *desc);
+int SDL_GetGpuPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc);
 
 
 
@@ -498,8 +498,8 @@ typedef struct SDL_GpuSamplerDescription
 } SDL_GpuSamplerDescription;
 
 typedef struct SDL_GpuSampler SDL_GpuSampler;
-SDL_GpuSampler *SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc);
-void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
+SDL_GpuSampler *SDL_CreateGpuSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc);
+void SDL_DestroyGpuSampler(SDL_GpuSampler *sampler);
 
 
 
@@ -518,15 +518,15 @@ void SDL_GpuDestroySampler(SDL_GpuSampler *sampler);
  *  will be destroyed when their owning cache is destroyed.
  *
  * Thread safety: each type of cache (pipeline, sampler) has its own internal
- *  mutex, which it locks during SDL_GpuGetCached* calls. It is not safe to
- *  call SDL_GpuDestroyStateCache while that cache is being used by another
+ *  mutex, which it locks during SDL_GetCachedGpu* calls. It is not safe to
+ *  call SDL_DestroyGpuStateCache while that cache is being used by another
  *  thread.
  */
 typedef struct SDL_GpuStateCache SDL_GpuStateCache;
-SDL_GpuStateCache *SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device);
-SDL_GpuPipeline *SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc);
-SDL_GpuSampler *SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
-void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
+SDL_GpuStateCache *SDL_CreateGpuStateCache(const char *label, SDL_GpuDevice *device);
+SDL_GpuPipeline *SDL_GetCachedGpuPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc);
+SDL_GpuSampler *SDL_GetCachedGpuSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc);
+void SDL_DestroyGpuStateCache(SDL_GpuStateCache *cache);
 
 // !!! FIXME: read/write state caches to disk?
 
@@ -546,7 +546,7 @@ void SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache);
  *   etc) into the same command buffer.
  */
 typedef struct SDL_GpuCommandBuffer SDL_GpuCommandBuffer;
-SDL_GpuCommandBuffer *SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device);
+SDL_GpuCommandBuffer *SDL_CreateGpuCommandBuffer(const char *label, SDL_GpuDevice *device);
 
 /* !!! FIXME: push/pop debug groups? */
 
@@ -586,7 +586,7 @@ typedef struct SDL_GpuStencilAttachmentDescription
 
 /* start encoding a render pass to a command buffer. You can only encode one type of pass to a command buffer at a time. End this pass to start encoding another. */
 typedef struct SDL_GpuRenderPass SDL_GpuRenderPass;
-SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
+SDL_GpuRenderPass *SDL_StartGpuRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
                             Uint32 num_color_attachments,
                             const SDL_GpuColorAttachmentDescription *color_attachments,
                             const SDL_GpuDepthAttachmentDescription *depth_attachment,
@@ -601,19 +601,19 @@ SDL_GpuRenderPass *SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffe
  *   was set to at the time. Try not to encode redundant state changes into a render pass
  *   as they will take resources to do nothing.
  */
-int SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
+int SDL_SetGpuRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline);
 
-int SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
-int SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
-int SDL_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);
+int SDL_SetGpuRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar);
+int SDL_SetGpuRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height);
+int SDL_SetGpuRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha);
 
-int SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
-int SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
-int SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+int SDL_SetGpuRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+int SDL_SetGpuRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+int SDL_SetGpuRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
-int SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
-int SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
-int SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
+int SDL_SetGpuRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index);
+int SDL_SetGpuRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index);
+int SDL_SetGpuRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index);
 
 
 /* Drawing! */
@@ -630,38 +630,38 @@ int SDL_GpuDrawInstanced(SDL_GpuRenderPass *pass, Uint32 vertex_start, Uint32 ve
 int SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_GpuIndexType index_type, SDL_GpuBuffer *index_buffer, Uint32 index_offset, Uint32 instance_count, Uint32 base_vertex, Uint32 base_instance);
 
 /* Done encoding this render pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
-int SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass);
+int SDL_EndGpuRenderPass(SDL_GpuRenderPass *pass);
 
 /* start encoding a blit pass to a command buffer. You can only encode one type of pass to a command buffer at a time.  End this pass to start encoding another. */
 typedef struct SDL_GpuBlitPass SDL_GpuBlitPass;
-SDL_GpuBlitPass *SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf);
-int SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+SDL_GpuBlitPass *SDL_StartGpuBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf);
+int SDL_CopyBetweenGpuTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                  Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                  Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                  SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
                                  Uint32 dstx, Uint32 dsty, Uint32 dstz);
 
-int SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value);
+int SDL_FillGpuBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value);
 
-int SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
+int SDL_GenerateGpuMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
 
-int SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
-int SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
-int SDL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_CopyCpuBufferToGpu(SDL_GpuBlitPass *pass, SDL_CpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_CopyGpuBufferToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_CpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+int SDL_CopyBetweenGpuBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
 
-int SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
+int SDL_CopyGpuBufferToGpuTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
                                      Uint32 srcpitch, Uint32 srcimgpitch,
                                      Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                      SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
                                      Uint32 dstx, Uint32 dsty, Uint32 dstz);
 
-int SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+int SDL_CopyGpuTextureToGpuBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                      Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                      Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                      SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
 
 /* Done encoding this blit pass into the command buffer. You can now commit the command buffer or start a new render (or whatever) pass. This `pass` pointer becomes invalid. */
-int SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
+int SDL_EndGpuBlitPass(SDL_GpuBlitPass *pass);
 
 
 /*
@@ -672,11 +672,11 @@ int SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass);
  *  and if it's safe to touch resources that are no longer in-flight.
  */
 typedef struct SDL_GpuFence SDL_GpuFence;
-SDL_GpuFence *SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device);
-void SDL_GpuDestroyFence(SDL_GpuFence *fence);
-int SDL_GpuQueryFence(SDL_GpuFence *fence);
-int SDL_GpuResetFence(SDL_GpuFence *fence);
-int SDL_GpuWaitFence(SDL_GpuFence *fence);
+SDL_GpuFence *SDL_CreateGpuFence(const char *label, SDL_GpuDevice *device);
+void SDL_DestroyGpuFence(SDL_GpuFence *fence);
+int SDL_QueryGpuFence(SDL_GpuFence *fence);
+int SDL_ResetGpuFence(SDL_GpuFence *fence);
+int SDL_WaitGpuFence(SDL_GpuFence *fence);
 
 
 /*
@@ -684,16 +684,16 @@ int SDL_GpuWaitFence(SDL_GpuFence *fence);
  * Command buffers are executed in the order they are submitted, and the commands in those buffers are executed in the order they were encoded.
  * Once a command buffer is submitted, its pointer becomes invalid. Create a new one for the next set of commands.
  */
-int SDL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence);
+int SDL_SubmitGpuCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence);
 
 /* If for some reason you've started encoding command buffers and decide _not_ to submit them to the GPU, you can
    abandon them, freeing their resources. This can be useful if something unrelated fails halfway through buffer encoding. */
-void SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer);
+void SDL_AbandonGpuCommandBuffer(SDL_GpuCommandBuffer *buffer);
 
 /*
  * Get a texture that can be used for rendering to an SDL window. The SDL_Window
  * may be destroyed and recreated internally on first use if incompatible with
- * the SDL_GpuDevice! As such, it does not need to be created with
+ * the SDL_GpuDevice! As such, it should not be created with
  * SDL_WINDOW_OPENGL or _VULKAN, etc, as this API will take care of it.
  *
  * SDL_Windows can not be used with more than one GPU device at a time (even if
@@ -726,7 +726,7 @@ void SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer);
  * pixels at any time. A render pass with SDL_GPUPASSINIT_LOAD will not be
  * meaningful for this texture until you've initialized it in some form.
  */
-SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
+SDL_GpuTexture *SDL_GetGpuBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
 
 /* Present a window's current backbuffer to the display. This will take the current SDL_GpuTexture returned by SDL_GpuGetBackbuffer
  * and queue it for presentation. The presentation request is queued after any submitted command buffers, so you should call this
@@ -735,9 +735,9 @@ SDL_GpuTexture *SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window);
  *
  * As soon as this call completes, the backbuffer is considered "in-flight."
  *
- * While the backbuffer is in-flight, the next call to SDL_GpuGetBackbuffer will return a different
+ * While the backbuffer is in-flight, the next call to SDL_GetGpuBackbuffer will return a different
  * texture and/or block. Do not use this backbuffer again after requesting presentation with it, as its pointer becomes
- * invalid; you should request a new one from SDL_GpuGetBackbuffer for future rendering. Note that presenting a window
+ * invalid; you should request a new one from SDL_GetGpuBackbuffer for future rendering. Note that presenting a window
  * with vsync will not block here, as this just queues the request.
  *
  * You should call this once per frame after rendering to a new backbuffer. If you haven't rendered to a backbuffer
@@ -751,13 +751,14 @@ int SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval);
 /* This makes a GPU buffer, and uploads data to it. This is not a fast call! But it removes a bunch of boilerplate code if you
    just want to blast data to a GPU buffer. This will submit a command buffer with a blit pass to the device and wait for
    it to complete. Returns NULL on error, the new GPU buffer otherwise. */
-SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
+SDL_GpuBuffer *SDL_CreateAndInitGpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data);
 
 /* Make sure your depth texture matches the window's backbuffer dimensions, if you don't care about managing the depth buffer yourself.
  * This assumes the depth texture is not still in-flight from a previous frame! If the depth texture needs to be resized, previous contents
  * will be lost.
  */
-SDL_GpuTexture *SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depth);
+/* !!! FIXME: decide if this is a good name for this function */
+SDL_GpuTexture *SDL_MatchingGpuDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depth);
 
 /* Since you need to leave a buffer untouched until the GPU is done with it, you often need to keep several buffers of uniforms
    that you cycle through as the GPU processes prior frames. If you don't want to manage this yourself, you can use a buffer cycle
@@ -766,30 +767,30 @@ SDL_GpuTexture *SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *de
    The "Ptr" version gives you the address of the item in the cycle, in case you need to rebuild it: for example, if you have a
    cycle of depth textures and the window gets resized, you'd use the Ptr version to destroy and recreate the object in the cycle.
    In normal use, you want the non-Ptr version, though. */
-typedef struct SDL_GpuCpuBufferCycle SDL_GpuCpuBufferCycle;
-SDL_GpuCpuBufferCycle *SDL_GpuCreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numbuffers);
-SDL_GpuCpuBuffer *SDL_GpuNextCpuBufferCycle(SDL_GpuCpuBufferCycle *cycle);
-SDL_GpuCpuBuffer **SDL_GpuNextCpuBufferPtrCycle(SDL_GpuCpuBufferCycle *cycle);
-void SDL_GpuDestroyCpuBufferCycle(SDL_GpuCpuBufferCycle *cycle);
+typedef struct SDL_CpuBufferCycle SDL_CpuBufferCycle;
+SDL_CpuBufferCycle *SDL_CreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numbuffers);
+SDL_CpuBuffer *SDL_GetNextCpuBufferInCycle(SDL_CpuBufferCycle *cycle);
+SDL_CpuBuffer **SDL_GetNextCpuBufferPtrInCycle(SDL_CpuBufferCycle *cycle);
+void SDL_DestroyCpuBufferCycle(SDL_CpuBufferCycle *cycle);
 
 typedef struct SDL_GpuBufferCycle SDL_GpuBufferCycle;
-SDL_GpuBufferCycle *SDL_GpuCreateBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numbuffers);
-SDL_GpuBuffer *SDL_GpuNextBufferCycle(SDL_GpuBufferCycle *cycle);
-SDL_GpuBuffer **SDL_GpuNextBufferPtrCycle(SDL_GpuBufferCycle *cycle);
-void SDL_GpuDestroyBufferCycle(SDL_GpuBufferCycle *cycle);
+SDL_GpuBufferCycle *SDL_CreateGpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numbuffers);
+SDL_GpuBuffer *SDL_GetNextGpuBufferInCycle(SDL_GpuBufferCycle *cycle);
+SDL_GpuBuffer **SDL_GetNextGpuBufferPtrInCycle(SDL_GpuBufferCycle *cycle);
+void SDL_DestroyGpuBufferCycle(SDL_GpuBufferCycle *cycle);
 
-/* if the texdesc is NULL, you will get a cycle of NULL textures that you can create later with SDL_GpuNextTexturePtrCycle */
+/* if the texdesc is NULL, you will get a cycle of NULL textures that you can create later with SDL_GetNextGpuTexturePtrInCycle */
 typedef struct SDL_GpuTextureCycle SDL_GpuTextureCycle;
-SDL_GpuTextureCycle *SDL_GpuCreateTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numtextures);
-SDL_GpuTexture *SDL_GpuNextTextureCycle(SDL_GpuTextureCycle *cycle);
-SDL_GpuTexture **SDL_GpuNextTexturePtrCycle(SDL_GpuTextureCycle *cycle);
-void SDL_GpuDestroyTextureCycle(SDL_GpuTextureCycle *cycle);
+SDL_GpuTextureCycle *SDL_CreateGpuTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numtextures);
+SDL_GpuTexture *SDL_GetNextGpuTextureInCycle(SDL_GpuTextureCycle *cycle);
+SDL_GpuTexture **SDL_GetNextGpuTexturePtrInCycle(SDL_GpuTextureCycle *cycle);
+void SDL_DestroyGpuTextureCycle(SDL_GpuTextureCycle *cycle);
 
 typedef struct SDL_GpuFenceCycle SDL_GpuFenceCycle;
-SDL_GpuFenceCycle *SDL_GpuCreateFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numfences);
-SDL_GpuFence *SDL_GpuNextFenceCycle(SDL_GpuFenceCycle *cycle);
-SDL_GpuFence **SDL_GpuNextFencePtrCycle(SDL_GpuFenceCycle *cycle);
-void SDL_GpuDestroyFenceCycle(SDL_GpuFenceCycle *cycle);
+SDL_GpuFenceCycle *SDL_CreateGpuFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numfences);
+SDL_GpuFence *SDL_GetNextGpuFenceInCycle(SDL_GpuFenceCycle *cycle);
+SDL_GpuFence **SDL_GetNextGpuFencePtrInCycle(SDL_GpuFenceCycle *cycle);
+void SDL_DestroyGpuFenceCycle(SDL_GpuFenceCycle *cycle);
 
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 873da5969a8be..5d0fe071e388e 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -37,17 +37,17 @@ static const SDL_GpuDriver *gpu_drivers[] = {
     &DUMMY_GpuDriver
 };
 
-Uint32
-SDL_GpuGetNumDrivers(void)
+int
+SDL_GetNumGpuDrivers(void)
 {
-    return (Uint32) SDL_arraysize(gpu_drivers);
+    return (int) SDL_arraysize(gpu_drivers);
 }
 
 const char *
-SDL_GpuGetDriverName(Uint32 index)
+SDL_GetGpuDriver(int index)
 {
-    const Uint32 numdrivers = (Uint32) SDL_arraysize(gpu_drivers);
-    if (index >= numdrivers) {
+    const int numdrivers = (int) SDL_arraysize(gpu_drivers);
+    if ((index < 0) || (index >= numdrivers)) {
         SDL_SetError("index must be in the range of 0 - %u", (unsigned int) (numdrivers ? (numdrivers - 1) : 0));
         return NULL;
     }
@@ -55,7 +55,7 @@ SDL_GpuGetDriverName(Uint32 index)
 }
 
 /* helper function since lots of things need an object and a label allocated. */
-static void *allocate_obj_and_string(const size_t objlen, const char *str, char **allocatedstr)
+static void *AllocObjAndString(const size_t objlen, const char *str, char **allocatedstr)
 {
     void *retval;
 
@@ -84,7 +84,7 @@ static void *allocate_obj_and_string(const size_t objlen, const char *str, char
 
 #define ALLOC_OBJ_WITH_LABEL(typ, var, str) { \
     char *cpystr; \
-    var = (typ *) allocate_obj_and_string(sizeof (typ), str, &cpystr); \
+    var = (typ *) AllocObjAndString(sizeof (typ), str, &cpystr); \
     if (var != NULL) { \
         var->label = cpystr; \
     } \
@@ -98,7 +98,7 @@ static void *allocate_obj_and_string(const size_t objlen, const char *str, char
 
 #define ALLOC_OBJ_WITH_DESC(typ, var, dsc) { \
     char *cpystr; \
-    var = (typ *) allocate_obj_and_string(sizeof (typ), (dsc)->label, &cpystr); \
+    var = (typ *) AllocObjAndString(sizeof (typ), (dsc)->label, &cpystr); \
     if (var != NULL) { \
         SDL_memcpy(&var->desc, dsc, sizeof (*(dsc)));\
         var->desc.label = cpystr; \
@@ -114,7 +114,7 @@ static void *allocate_obj_and_string(const size_t objlen, const char *str, char
 
 /* !!! FIXME: change this API to allow selection of a specific GPU? */
 static int
-GpuCreateDeviceInternal(SDL_GpuDevice *device, const char *driver)
+CreateGpuDeviceInternal(SDL_GpuDevice *device, const char *driver)
 {
     size_t i;
 
@@ -156,13 +156,13 @@ GpuCreateDeviceInternal(SDL_GpuDevice *device, const char *driver)
 }
 
 SDL_GpuDevice *
-SDL_GpuCreateDevice(const char *label, const char *driver)
+SDL_CreateGpuDevice(const char *label, const char *driver)
 {
     SDL_GpuDevice *device;
     ALLOC_OBJ_WITH_LABEL(SDL_GpuDevice, device, label);
 
     if (device != NULL) {
-        if (GpuCreateDeviceInternal(device, driver) == -1) {
+        if (CreateGpuDeviceInternal(device, driver) == -1) {
             FREE_AND_NULL_OBJ_WITH_LABEL(device);
         }
     }
@@ -170,7 +170,7 @@ SDL_GpuCreateDevice(const char *label, const char *driver)
 }
 
 void
-SDL_GpuDestroyDevice(SDL_GpuDevice *device)
+SDL_DestroyGpuDevice(SDL_GpuDevice *device)
 {
     if (device) {
         device->DestroyDevice(device);
@@ -178,16 +178,16 @@ SDL_GpuDestroyDevice(SDL_GpuDevice *device)
     }
 }
 
-SDL_GpuCpuBuffer *
-SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
+SDL_CpuBuffer *
+SDL_CreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
 {
-    SDL_GpuCpuBuffer *buffer = NULL;
+    SDL_CpuBuffer *buffer = NULL;
     if (!device) {
         SDL_InvalidParamError("device");
     } else if (buflen == 0) {
         SDL_InvalidParamError("buflen");
     } else {
-        ALLOC_OBJ_WITH_LABEL(SDL_GpuCpuBuffer, buffer, label);
+        ALLOC_OBJ_WITH_LABEL(SDL_CpuBuffer, buffer, label);
         if (buffer != NULL) {
             buffer->device = device;
             buffer->buflen = buflen;
@@ -200,7 +200,7 @@ SDL_GpuCreateCpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 bu
 }
 
 void
-SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
+SDL_DestroyCpuBuffer(SDL_CpuBuffer *buffer)
 {
     if (buffer) {
         buffer->device->DestroyCpuBuffer(buffer);
@@ -209,7 +209,7 @@ SDL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
 }
 
 void *
-SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen)
+SDL_LockCpuBuffer(SDL_CpuBuffer *buffer, Uint32 *_buflen)
 {
     void *retval = NULL;
     if (!buffer) {
@@ -225,7 +225,7 @@ SDL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer, Uint32 *_buflen)
 }
 
 int
-SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+SDL_UnlockCpuBuffer(SDL_CpuBuffer *buffer)
 {
     if (!buffer) {
         return SDL_InvalidParamError("buffer");
@@ -234,7 +234,7 @@ SDL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
 }
 
 SDL_GpuBuffer *
-SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen)
+SDL_CreateGpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen)
 {
     SDL_GpuBuffer *buffer = NULL;
 
@@ -256,7 +256,7 @@ SDL_GpuCreateBuffer(const char *label, SDL_GpuDevice *device, const Uint32 bufle
 }
 
 void
-SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer)
+SDL_DestroyGpuBuffer(SDL_GpuBuffer *buffer)
 {
     if (buffer) {
         buffer->device->DestroyBuffer(buffer);
@@ -265,7 +265,7 @@ SDL_GpuDestroyBuffer(SDL_GpuBuffer *buffer)
 }
 
 SDL_GpuTexture *
-SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc)
+SDL_CreateGpuTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *desc)
 {
     SDL_GpuTexture *texture = NULL;
 
@@ -298,7 +298,7 @@ SDL_GpuCreateTexture(SDL_GpuDevice *device, const SDL_GpuTextureDescription *des
 }
 
 int
-SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc)
+SDL_GetGpuTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription *desc)
 {
     if (!texture) {
         return SDL_InvalidParamError("pipeline");
@@ -310,7 +310,7 @@ SDL_GpuGetTextureDescription(SDL_GpuTexture *texture, SDL_GpuTextureDescription
 }
 
 void
-SDL_GpuDestroyTexture(SDL_GpuTexture *texture)
+SDL_DestroyGpuTexture(SDL_GpuTexture *texture)
 {
     if (texture) {
         texture->device->DestroyTexture(texture);
@@ -319,7 +319,7 @@ SDL_GpuDestroyTexture(SDL_GpuTexture *texture)
 }
 
 SDL_GpuShader *
-SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen)
+SDL_CreateGpuShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytecode, const Uint32 bytecodelen)
 {
     SDL_GpuShader *shader = NULL;
 
@@ -343,7 +343,7 @@ SDL_GpuCreateShader(const char *label, SDL_GpuDevice *device, const Uint8 *bytec
 }
 
 void
-SDL_GpuDestroyShader(SDL_GpuShader *shader)
+SDL_DestroyGpuShader(SDL_GpuShader *shader)
 {
     if (shader) {
         if (SDL_AtomicDecRef(&shader->refcount)) {
@@ -354,7 +354,7 @@ SDL_GpuDestroyShader(SDL_GpuShader *shader)
 }
 
 SDL_GpuPipeline *
-SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc)
+SDL_CreateGpuPipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *desc)
 {
     SDL_GpuPipeline *pipeline = NULL;
     if (!device) {
@@ -385,7 +385,7 @@ SDL_GpuCreatePipeline(SDL_GpuDevice *device, const SDL_GpuPipelineDescription *d
 }
 
 void
-SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline)
+SDL_DestroyGpuPipeline(SDL_GpuPipeline *pipeline)
 {
     if (pipeline) {
         SDL_GpuShader *vshader = pipeline->desc.vertex_shader;
@@ -395,13 +395,13 @@ SDL_GpuDestroyPipeline(SDL_GpuPipeline *pipeline)
         FREE_AND_NULL_OBJ_WITH_DESC(pipeline);
 
         /* decrement reference counts (and possibly destroy) the shaders. */
-        SDL_GpuDestroyShader(vshader);
-        SDL_GpuDestroyShader(fshader);
+        SDL_DestroyGpuShader(vshader);
+        SDL_DestroyGpuShader(fshader);
     }
 }
 
 void
-SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc)
+SDL_GetDefaultGpuPipelineDescription(SDL_GpuPipelineDescription *desc)
 {
     /* !!! FIXME: decide if these are reasonable defaults. */
     SDL_zerop(desc);
@@ -436,7 +436,7 @@ SDL_GpuDefaultPipelineDescription(SDL_GpuPipelineDescription *desc)
 }
 
 int
-SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc)
+SDL_GetGpuPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescription *desc)
 {
     if (!pipeline) {
         return SDL_InvalidParamError("pipeline");
@@ -448,7 +448,7 @@ SDL_GpuGetPipelineDescription(SDL_GpuPipeline *pipeline, SDL_GpuPipelineDescript
 }
 
 SDL_GpuSampler *
-SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc)
+SDL_CreateGpuSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *desc)
 {
     SDL_GpuSampler *sampler = NULL;
     if (!device) {
@@ -468,7 +468,7 @@ SDL_GpuCreateSampler(SDL_GpuDevice *device, const SDL_GpuSamplerDescription *des
 }
 
 void
-SDL_GpuDestroySampler(SDL_GpuSampler *sampler)
+SDL_DestroyGpuSampler(SDL_GpuSampler *sampler)
 {
     if (sampler) {
         sampler->device->DestroySampler(sampler);
@@ -648,7 +648,7 @@ void nuke_pipeline(const void *key, const void *value, void *data)
 {
     SDL_GpuPipeline *pipeline = (SDL_GpuPipeline *) value;
     SDL_assert(key == &pipeline->desc);
-    SDL_GpuDestroyPipeline(pipeline);
+    SDL_DestroyGpuPipeline(pipeline);
 }
 
 
@@ -687,11 +687,11 @@ void nuke_sampler(const void *key, const void *value, void *data)
 {
     SDL_GpuSampler *sampler = (SDL_GpuSampler *) value;
     SDL_assert(key == &sampler->desc);
-    SDL_GpuDestroySampler(sampler);
+    SDL_DestroyGpuSampler(sampler);
 }
 
 SDL_GpuStateCache *
-SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
+SDL_CreateGpuStateCache(const char *label, SDL_GpuDevice *device)
 {
     SDL_GpuStateCache *cache = NULL;
     if (!device) {
@@ -706,7 +706,7 @@ SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
             cache->pipeline_cache = SDL_NewHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
             cache->sampler_cache = SDL_NewHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
             if (!cache->pipeline_mutex || !cache->sampler_mutex || !cache->pipeline_cache || !cache->sampler_cache) {
-                SDL_GpuDestroyStateCache(cache);  /* can clean up half-created objects. */
+                SDL_DestroyGpuStateCache(cache);  /* can clean up half-created objects. */
                 cache = NULL;
             }
         }
@@ -728,10 +728,10 @@ SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
     if (SDL_FindInHashTable(cache->typ##_cache, desc, &val)) { \
         retval = (SDL_Gpu##ctyp *) val; \
     } else {  /* not cached yet, make a new one and cache it. */ \
-        retval = SDL_GpuCreate##ctyp(cache->device, desc); \
+        retval = SDL_CreateGpu##ctyp(cache->device, desc); \
         if (retval) { \
             if (!SDL_InsertIntoHashTable(cache->typ##_cache, &retval->desc, retval)) { \
-                SDL_GpuDestroy##ctyp(retval); \
+                SDL_DestroyGpu##ctyp(retval); \
                 retval = NULL; \
             } \
         } \
@@ -740,19 +740,19 @@ SDL_GpuCreateStateCache(const char *label, SDL_GpuDevice *device)
     return retval
 
 SDL_GpuPipeline *
-SDL_GpuGetCachedPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc)
+SDL_GetCachedGpuPipeline(SDL_GpuStateCache *cache, const SDL_GpuPipelineDescription *desc)
 {
     GETCACHEDOBJIMPL(Pipeline, pipeline);
 }
 
 SDL_GpuSampler *
-SDL_GpuGetCachedSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc)
+SDL_GetCachedGpuSampler(SDL_GpuStateCache *cache, const SDL_GpuSamplerDescription *desc)
 {
     GETCACHEDOBJIMPL(Sampler, sampler);
 }
 
 void
-SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache)
+SDL_DestroyGpuStateCache(SDL_GpuStateCache *cache)
 {
     if (cache) {
         SDL_DestroyMutex(cache->pipeline_mutex);
@@ -764,7 +764,7 @@ SDL_GpuDestroyStateCache(SDL_GpuStateCache *cache)
 }
 
 SDL_GpuCommandBuffer *
-SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device)
+SDL_CreateGpuCommandBuffer(const char *label, SDL_GpuDevice *device)
 {
     SDL_GpuCommandBuffer *cmdbuf = NULL;
     if (!device) {
@@ -782,7 +782,7 @@ SDL_GpuCreateCommandBuffer(const char *label, SDL_GpuDevice *device)
 }
 
 SDL_GpuRenderPass *
-SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
+SDL_StartGpuRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
                        Uint32 num_color_attachments,
                        const SDL_GpuColorAttachmentDescription *color_attachments,
                        const SDL_GpuDepthAttachmentDescription *depth_attachment,
@@ -824,7 +824,7 @@ SDL_GpuStartRenderPass(const char *label, SDL_GpuCommandBuffer *cmdbuf,
 }
 
 int
-SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
+SDL_SetGpuRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -834,55 +834,55 @@ SDL_GpuSetRenderPassPipeline(SDL_GpuRenderPass *pass, SDL_GpuPipeline *pipeline)
 }
 
 int
-SDL_GpuSetRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar)
+SDL_SetGpuRenderPassViewport(SDL_GpuRenderPass *pass, const double x, const double y, const double width, const double height, const double znear, const double zfar)
 {
     return pass ? pass->device->SetRenderPassViewport(pass, x, y, width, height, znear, zfar) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height)
+SDL_SetGpuRenderPassScissor(SDL_GpuRenderPass *pass, const Uint32 x, const Uint32 y, const Uint32 width, const Uint32 height)
 {
     return pass ? pass->device->SetRenderPassScissor(pass, x, y, width, height) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha)
+SDL_SetGpuRenderPassBlendConstant(SDL_GpuRenderPass *pass, const double red, const double green, const double blue, const double alpha)
 {
     return pass ? pass->device->SetRenderPassBlendConstant(pass, red, green, blue, alpha) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
+SDL_SetGpuRenderPassVertexBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassVertexBuffer(pass, buffer, offset, index) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
+SDL_SetGpuRenderPassVertexSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassVertexSampler(pass, sampler, index) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
+SDL_SetGpuRenderPassVertexTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassVertexTexture(pass, texture, index) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
+SDL_SetGpuRenderPassFragmentBuffer(SDL_GpuRenderPass *pass, SDL_GpuBuffer *buffer, const Uint32 offset, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassFragmentBuffer(pass, buffer, offset, index) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
+SDL_SetGpuRenderPassFragmentSampler(SDL_GpuRenderPass *pass, SDL_GpuSampler *sampler, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassFragmentSampler(pass, sampler, index) : SDL_InvalidParamError("pass");
 }
 
 int
-SDL_GpuSetRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
+SDL_SetGpuRenderPassFragmentTexture(SDL_GpuRenderPass *pass, SDL_GpuTexture *texture, const Uint32 index)
 {
     return pass ? pass->device->SetRenderPassFragmentTexture(pass, texture, index) : SDL_InvalidParamError("pass");
 }
@@ -912,7 +912,7 @@ SDL_GpuDrawInstancedIndexed(SDL_GpuRenderPass *pass, Uint32 index_count, SDL_Gpu
 }
 
 int
-SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
+SDL_EndGpuRenderPass(SDL_GpuRenderPass *pass)
 {
     int retval;
     if (!pass) {
@@ -929,7 +929,7 @@ SDL_GpuEndRenderPass(SDL_GpuRenderPass *pass)
 
 
 SDL_GpuBlitPass *
-SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
+SDL_StartGpuBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
 {
     SDL_GpuBlitPass *pass = NULL;
     if (!cmdbuf) {
@@ -952,7 +952,7 @@ SDL_GpuStartBlitPass(const char *label, SDL_GpuCommandBuffer *cmdbuf)
 }
 
 int
-SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+SDL_CopyBetweenGpuTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                            Uint32 srcx, Uint32 srcy, Uint32 srcz,
                            Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                            SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
@@ -970,7 +970,7 @@ SDL_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32
 }
 
 int
-SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value)
+SDL_FillGpuBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -983,7 +983,7 @@ SDL_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, U
 }
 
 int
-SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
+SDL_GenerateGpuMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -994,7 +994,7 @@ SDL_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture)
 }
 
 int
-SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+SDL_CopyCpuBufferToGpu(SDL_GpuBlitPass *pass, SDL_CpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -1011,7 +1011,7 @@ SDL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint3
 }
 
 int
-SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+SDL_CopyGpuBufferToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_CpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -1028,7 +1028,7 @@ SDL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 s
 }
 
 int
-SDL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+SDL_CopyBetweenGpuBuffers(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
     if (!pass) {
         return SDL_InvalidParamError("pass");
@@ -1045,7 +1045,7 @@ SDL_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 s
 }
 
 int
-SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
+SDL_CopyGpuBufferToGpuTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset,
                                Uint32 srcpitch, Uint32 srcimgpitch,
                                Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel,
@@ -1063,7 +1063,7 @@ SDL_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uin
 }
 
 int
-SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
+SDL_GpuCopyGpuTextureToGpuBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel,
                                Uint32 srcx, Uint32 srcy, Uint32 srcz,
                                Uint32 srcw, Uint32 srch, Uint32 srcdepth,
                                SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch)
@@ -1080,7 +1080,7 @@ SDL_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Ui
 }
 
 int
-SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
+SDL_EndGpuBlitPass(SDL_GpuBlitPass *pass)
 {
     int retval;
     if (!pass) {
@@ -1096,7 +1096,7 @@ SDL_GpuEndBlitPass(SDL_GpuBlitPass *pass)
 }
 
 SDL_GpuFence *
-SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device)
+SDL_CreateGpuFence(const char *label, SDL_GpuDevice *device)
 {
     SDL_GpuFence *fence = NULL;
     if (!device) {
@@ -1114,7 +1114,7 @@ SDL_GpuCreateFence(const char *label, SDL_GpuDevice *device)
 }
 
 void
-SDL_GpuDestroyFence(SDL_GpuFence *fence)
+SDL_DestroyGpuFence(SDL_GpuFence *fence)
 {
     if (fence) {
         fence->device->DestroyFence(fence);
@@ -1123,25 +1123,25 @@ SDL_GpuDestroyFence(SDL_GpuFence *fence)
 }
 
 int
-SDL_GpuQueryFence(SDL_GpuFence *fence)
+SDL_QueryGpuFence(SDL_GpuFence *fence)
 {
     return fence ? fence->device->QueryFence(fence) : SDL_InvalidParamError("fence");
 }
 
 int
-SDL_GpuResetFence(SDL_GpuFence *fence)
+SDL_ResetGpuFence(SDL_GpuFence *fence)
 {
     return fence ? fence->device->ResetFence(fence) : SDL_InvalidParamError("fence");
 }
 
 int
-SDL_GpuWaitFence(SDL_GpuFence *fence)
+SDL_WaitGpuFence(SDL_GpuFence *fence)
 {
     return fence ? fence->device->WaitFence(fence) : SDL_InvalidParamError("fence");
 }
 
 int
-SDL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence)
+SDL_SubmitGpuCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence)
 {
     int retval;
 
@@ -1160,7 +1160,7 @@ SDL_GpuSubmitCommandBuffer(SDL_GpuCommandBuffer *cmdbuf, SDL_GpuFence *fence)
 }
 
 void
-SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer)
+SDL_AbandonGpuCommandBuffer(SDL_GpuCommandBuffer *buffer)
 {
     if (buffer) {
         /* !!! FIXME: deal with buffer->currently_encoding */
@@ -1170,7 +1170,7 @@ SDL_GpuAbandonCommandBuffer(SDL_GpuCommandBuffer *buffer)
 }
 
 SDL_GpuTexture *
-SDL_GpuGetBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
+SDL_GetGpuBackbuffer(SDL_GpuDevice *device, SDL_Window *window)
 {
     SDL_GpuTexture *retval = NULL;
     if (!device) {
@@ -1222,9 +1222,9 @@ SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval)
     } else if (!window) {
         return SDL_InvalidParamError("window");
     } else if (window->gpu_device != device) {
-        return SDL_SetError("Window is not claimed by this GPU device (call SDL_GpuGetBackbuffer first!)");
+        return SDL_SetError("Window is not claimed by this GPU device (call SDL_GetGpuBackbuffer first!)");
     } else if (!window->gpu_backbuffer) {
-        return SDL_SetError("Window does not have a prepared backbuffer (call SDL_GpuGetBackbuffer first!)");
+        return SDL_SetError("Window does not have a prepared backbuffer (call SDL_GetGpuBackbuffer first!)");
     } else if (device->Present(device, window, (SDL_GpuTexture *) window->gpu_backbuffer, swapinterval) == -1) {
         return -1;
     } else {
@@ -1238,10 +1238,10 @@ SDL_GpuPresent(SDL_GpuDevice *device, SDL_Window *window, int swapinterval)
     return 0;
 }
 
-SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
+SDL_GpuBuffer *SDL_CreateAndInitGpuBuffer(const char *label, SDL_GpuDevice *device, const Uint32 buflen, const void *data)
 {
     SDL_GpuFence *fence = NULL;
-    SDL_GpuCpuBuffer *staging = NULL;
+    SDL_CpuBuffer *staging = NULL;
     SDL_GpuBuffer *gpubuf = NULL;
     SDL_GpuBuffer *retval = NULL;
     SDL_GpuCommandBuffer *cmd = NULL;
@@ -1255,32 +1255,32 @@ SDL_GpuBuffer *SDL_GpuCreateAndInitBuffer(const char *label, SDL_GpuDevice *devi
         return NULL;
     }
 
-    if ( ((fence = SDL_GpuCreateFence("Temporary fence for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
-         ((staging = SDL_GpuCreateCpuBuffer("Staging buffer for SDL_GpuCreateAndInitBuffer", device, buflen, data)) != NULL) &&
-         ((gpubuf = SDL_GpuCreateBuffer(label, device, buflen)) != NULL) &&
-         ((cmd = SDL_GpuCreateCommandBuffer("Command buffer for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
-         ((blit = SDL_GpuStartBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
-        SDL_GpuCopyBufferCpuToGpu(blit, staging, 0, gpubuf, 0, buflen);
-        SDL_GpuEndBlitPass(blit);
-        SDL_GpuSubmitCommandBuffer(cmd, fence);
-        SDL_GpuWaitFence(fence);  /* so we know it's definitely uploaded */
+    if ( ((fence = SDL_CreateGpuFence("Temporary fence for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
+         ((staging = SDL_CreateCpuBuffer("Staging buffer for SDL_GpuCreateAndInitBuffer", device, buflen, data)) != NULL) &&
+         ((gpubuf = SDL_CreateGpuBuffer(label, device, buflen)) != NULL) &&
+         ((cmd = SDL_CreateGpuCommandBuffer("Command buffer for SDL_GpuCreateAndInitBuffer", device)) != NULL) &&
+         ((blit = SDL_StartGpuBlitPass("Blit pass for SDL_GpuCreateAndInitBuffer", cmd)) != NULL) ) {
+        SDL_CopyCpuBufferToGpu(blit, staging, 0, gpubuf, 0, buflen);
+        SDL_EndGpuBlitPass(blit);
+        SDL_SubmitGpuCommandBuffer(cmd, fence);
+        SDL_WaitGpuFence(fence);  /* so we know it's definitely uploaded */
         retval = gpubuf;
     }
 
     if (!retval) {
-        SDL_GpuEndBlitPass(blit);   /* assume this might be un-ended. */
-        SDL_GpuAbandonCommandBuffer(cmd);
-        SDL_GpuDestroyBuffer(gpubuf);
+        SDL_EndGpuBlitPass(blit);   /* assume this might be un-ended. */
+        SDL_AbandonGpuCommandBuffer(cmd);
+        SDL_DestroyGpuBuffer(gpubuf);
     }
-    SDL_GpuDestroyCpuBuffer(staging);
-    SDL_GpuDestroyFence(fence);
+    SDL_DestroyCpuBuffer(staging);
+    SDL_DestroyGpuFence(fence);
     return retval;
 }
 
 /* !!! FIXME: SDL_GpuCreateAndInitTexture */
 
 SDL_GpuTexture *
-SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depthtex)
+SDL_MatchingGpuDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTexture *backbuffer, SDL_GpuTexture **depthtex)
 {
     SDL_GpuTextureDescription bbtexdesc, depthtexdesc;
 
@@ -1295,10 +1295,10 @@ SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTex
         return NULL;
     }
 
-    SDL_GpuGetTextureDescription(backbuffer, &bbtexdesc);
+    SDL_GetGpuTextureDescription(backbuffer, &bbtexdesc);
 
     if (*depthtex) {
-        SDL_GpuGetTextureDescription(*depthtex, &depthtexdesc);
+        SDL_GetGpuTextureDescription(*depthtex, &depthtexdesc);
     }
 
     /* !!! FIXME: check texture_type, pixel_format, etc? */
@@ -1310,52 +1310,52 @@ SDL_GpuMatchingDepthTexture(const char *label, SDL_GpuDevice *device, SDL_GpuTex
         depthtexdesc.usage = SDL_GPUTEXUSAGE_RENDER_TARGET;  /* !!! FIXME: does this need shader read or write to be the depth buffer? */
         depthtexdesc.width = bbtexdesc.width;
         depthtexdesc.height = bbtexdesc.width;
-        SDL_GpuDestroyTexture(*depthtex);
-        *depthtex = SDL_GpuCreateTexture(device, &depthtexdesc);
+        SDL_DestroyGpuTexture(*depthtex);
+        *depthtex = SDL_CreateGpuTexture(device, &depthtexdesc);
     }
 
     return *depthtex;
 }
 
 /* various object cycle APIs ... */
-#define SDL_GPUCYCLETYPE SDL_GpuCpuBufferCycle
-#define SDL_GPUCYCLEITEMTYPE SDL_GpuCpuBuffer
-#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numitems)
-#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextCpuBufferCycle
-#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextCpuBufferPtrCycle
-#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyCpuBufferCycle
-#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateCpuBuffer(lbl, device, bufsize, data); failvar = (itemvar == NULL); }
-#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyCpuBuffer
+#define SDL_GPUCYCLETYPE SDL_CpuBufferCycle
+#define SDL_GPUCYCLEITEMTYPE SDL_CpuBuffer
+#define SDL_GPUCYCLECREATEFNSIG SDL_CreateCpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const void *data, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GetNextCpuBufferInCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_NextCpuBufferPtrInCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_DestroyCpuBufferCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_CreateCpuBuffer(lbl, device, bufsize, data); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_DestroyCpuBuffer
 #include "SDL_gpu_cycle_impl.h"
 
 #define SDL_GPUCYCLETYPE SDL_GpuBufferCycle
 #define SDL_GPUCYCLEITEMTYPE SDL_GpuBuffer
-#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numitems)
-#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextBufferCycle
-#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextBufferPtrCycle
-#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyBufferCycle
-#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateBuffer(lbl, device, bufsize); failvar = (itemvar == NULL); }
-#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyBuffer
+#define SDL_GPUCYCLECREATEFNSIG SDL_CreateGpuBufferCycle(const char *label, SDL_GpuDevice *device, const Uint32 bufsize, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GetNextGpuBufferCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GetNextGpuBufferPtrInCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_DestroyGpuBufferCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_CreateGpuBuffer(lbl, device, bufsize); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_DestroyGpuBuffer
 #include "SDL_gpu_cycle_impl.h"
 
 #define SDL_GPUCYCLETYPE SDL_GpuTextureCycle
 #define SDL_GPUCYCLEITEMTYPE SDL_GpuTexture
-#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numitems)
+#define SDL_GPUCYCLECREATEFNSIG SDL_CreateGpuTextureCycle(const char *label, SDL_GpuDevice *device, const SDL_GpuTextureDescription *texdesc, const Uint32 numitems)
 #define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextTextureCycle
 #define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextTexturePtrCycle
-#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyTextureCycle
-#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { if (texdesc) { SDL_GpuTextureDescription td; SDL_memcpy(&td, texdesc, sizeof (td)); td.label = lbl; itemvar = SDL_GpuCreateTexture(device, &td); failvar = (itemvar == NULL); } else { itemvar = NULL; failvar = SDL_FALSE; } }
-#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyTexture
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_DestroyGpuTextureCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { if (texdesc) { SDL_GpuTextureDescription td; SDL_memcpy(&td, texdesc, sizeof (td)); td.label = lbl; itemvar = SDL_CreateGpuTexture(device, &td); failvar = (itemvar == NULL); } else { itemvar = NULL; failvar = SDL_FALSE; } }
+#define SDL_GPUCYCLEDESTROY SDL_DestroyGpuTexture
 #include "SDL_gpu_cycle_impl.h"
 
 #define SDL_GPUCYCLETYPE SDL_GpuFenceCycle
 #define SDL_GPUCYCLEITEMTYPE SDL_GpuFence
-#define SDL_GPUCYCLECREATEFNSIG SDL_GpuCreateFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numitems)
-#define SDL_GPUCYCLENEXTFNNAME SDL_GpuNextFenceCycle
-#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GpuNextFencePtrCycle
-#define SDL_GPUCYCLEDESTROYFNNAME SDL_GpuDestroyFenceCycle
-#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_GpuCreateFence(lbl, device); failvar = (itemvar == NULL); }
-#define SDL_GPUCYCLEDESTROY SDL_GpuDestroyFence
+#define SDL_GPUCYCLECREATEFNSIG SDL_CreateGpuFenceCycle(const char *label, SDL_GpuDevice *device, const Uint32 numitems)
+#define SDL_GPUCYCLENEXTFNNAME SDL_GetNextGpuFenceInCycle
+#define SDL_GPUCYCLENEXTPTRFNNAME SDL_GetNextGpuFencePtrInCycle
+#define SDL_GPUCYCLEDESTROYFNNAME SDL_DestroyGpuFenceCycle
+#define SDL_GPUCYCLECREATE(lbl, failvar, itemvar) { itemvar = SDL_CreateGpuFence(lbl, device); failvar = (itemvar == NULL); }
+#define SDL_GPUCYCLEDESTROY SDL_DestroyGpuFence
 #include "SDL_gpu_cycle_impl.h"
 
 /* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index f9ecb979cc59e..cc3a0b36e5cc4 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -28,7 +28,7 @@
 #include "../SDL_hashtable.h"
 
 
-struct SDL_GpuCpuBuffer
+struct SDL_CpuBuffer
 {
     SDL_GpuDevice *device;
     const char *label;
@@ -115,10 +115,10 @@ struct SDL_GpuDevice
     /* !!! FIXME: we need an UnclaimWindow for when the device (or window!) is being destroyed */
     int (*ClaimWindow)(SDL_GpuDevice *device, SDL_Window *window);
 
-    int (*CreateCpuBuffer)(SDL_GpuCpuBuffer *buffer, const void *data);
-    void (*DestroyCpuBuffer)(SDL_GpuCpuBuffer *buffer);
-    void *(*LockCpuBuffer)(SDL_GpuCpuBuffer *buffer);
-    int (*UnlockCpuBuffer)(SDL_GpuCpuBuffer *buffer);
+    int (*CreateCpuBuffer)(SDL_CpuBuffer *buffer, const void *data);
+    void (*DestroyCpuBuffer)(SDL_CpuBuffer *buffer);
+    void *(*LockCpuBuffer)(SDL_CpuBuffer *buffer);
+    int (*UnlockCpuBuffer)(SDL_CpuBuffer *buffer);
 
     int (*CreateBuffer)(SDL_GpuBuffer *buffer);
     void (*DestroyBuffer)(SDL_GpuBuffer *buffer);
@@ -159,8 +159,8 @@ struct SDL_GpuDevice
     int (*CopyBetweenTextures)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz);
     int (*FillBuffer)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value);
     int (*GenerateMipmaps)(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture);
-    int (*CopyBufferCpuToGpu)(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
-    int (*CopyBufferGpuToCpu)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+    int (*CopyBufferCpuToGpu)(SDL_GpuBlitPass *pass, SDL_CpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
+    int (*CopyBufferGpuToCpu)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_CpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
     int (*CopyBufferGpuToGpu)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length);
     int (*CopyFromBufferToTexture)(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz);
     int (*CopyFromTextureToBuffer)(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch);
diff --git a/src/gpu/dummy/SDL_gpu_dummy.c b/src/gpu/dummy/SDL_gpu_dummy.c
index f41f3bce5e1f2..f9ca12d0cfb70 100644
--- a/src/gpu/dummy/SDL_gpu_dummy.c
+++ b/src/gpu/dummy/SDL_gpu_dummy.c
@@ -30,7 +30,7 @@ static void DUMMY_GpuDestroyDevice(SDL_GpuDevice *device) { /* no-op */ }
 
 static int DUMMY_GpuClaimWindow(SDL_GpuDevice *device, SDL_Window *window) { return 0; }
 
-static int DUMMY_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
+static int DUMMY_GpuCreateCpuBuffer(SDL_CpuBuffer *buffer, const void *data)
 {
     /* have to save off buffer data so we can provide it for locking, etc. */
     buffer->driverdata = SDL_calloc(1, buffer->buflen);
@@ -43,19 +43,19 @@ static int DUMMY_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
     return 0;
 }
 
-static void DUMMY_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
+static void DUMMY_GpuDestroyCpuBuffer(SDL_CpuBuffer *buffer)
 {
     SDL_free(buffer->driverdata);
 }
 
-static void *DUMMY_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+static void *DUMMY_GpuLockCpuBuffer(SDL_CpuBuffer *buffer)
 {
     return buffer->driverdata;
 }
 
 /* we could get fancier and manage imaginary GPU buffers and textures, but I don't think it's worth it atm. */
 
-static int DUMMY_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer) { return 0; }
+static int DUMMY_GpuUnlockCpuBuffer(SDL_CpuBuffer *buffer) { return 0; }
 static int DUMMY_GpuCreateBuffer(SDL_GpuBuffer *buffer) { return 0; }
 static void DUMMY_GpuDestroyBuffer(SDL_GpuBuffer *buffer) {}
 static int DUMMY_GpuCreateTexture(SDL_GpuTexture *texture) { return 0; }
@@ -87,8 +87,8 @@ static int DUMMY_GpuStartBlitPass(SDL_GpuBlitPass *pass) { return 0; }
 static int DUMMY_GpuCopyBetweenTextures(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
 static int DUMMY_GpuFillBuffer(SDL_GpuBlitPass *pass, SDL_GpuBuffer *buffer, Uint32 offset, Uint32 length, Uint8 value) { return 0; }
 static int DUMMY_GpuGenerateMipmaps(SDL_GpuBlitPass *pass, SDL_GpuTexture *texture) { return 0; }
-static int DUMMY_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
-static int DUMMY_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
+static int DUMMY_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_CpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
+static int DUMMY_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_CpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
 static int DUMMY_GpuCopyBufferGpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length) { return 0; }
 static int DUMMY_GpuCopyFromBufferToTexture(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, Uint32 srcpitch, Uint32 srcimgpitch, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuTexture *dsttex, Uint32 dstslice, Uint32 dstlevel, Uint32 dstx, Uint32 dsty, Uint32 dstz) { return 0; }
 static int DUMMY_GpuCopyFromTextureToBuffer(SDL_GpuBlitPass *pass, SDL_GpuTexture *srctex, Uint32 srcslice, Uint32 srclevel, Uint32 srcx, Uint32 srcy, Uint32 srcz, Uint32 srcw, Uint32 srch, Uint32 srcdepth, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 dstpitch, Uint32 dstimgpitch) { return 0; }
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index 3d10ac845054b..d2f0ce5d9c01e 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -553,7 +553,7 @@ @implementation METAL_GpuBlitPassData
 }
 
 static int
-METAL_GpuCreateCpuBuffer(SDL_GpuCpuBuffer *buffer, const void *data)
+METAL_GpuCreateCpuBuffer(SDL_CpuBuffer *buffer, const void *data)
 {
     METAL_GpuDeviceData *devdata = (__bridge METAL_GpuDeviceData *) buffer->device->driverdata;
     METAL_GpuBufferData *bufferdata;
@@ -583,13 +583,13 @@ @implementation METAL_GpuBlitPassData
 }
 
 static void
-METAL_GpuDestroyCpuBuffer(SDL_GpuCpuBuffer *buffer)
+METAL_GpuDestroyCpuBuffer(SDL_CpuBuffer *buffer)
 {
     CFBridgingRelease(buffer->driverdata);
 }
 
 static void *
-METAL_GpuLockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+METAL_GpuLockCpuBuffer(SDL_CpuBuffer *buffer)
 {
     METAL_GpuBufferData *bufdata = (__bridge METAL_GpuBufferData *) buffer->driverdata;
     void *retval = [bufdata.mtlbuffer contents];
@@ -598,7 +598,7 @@ @implementation METAL_GpuBlitPassData
 }
 
 static int
-METAL_GpuUnlockCpuBuffer(SDL_GpuCpuBuffer *buffer)
+METAL_GpuUnlockCpuBuffer(SDL_CpuBuffer *buffer)
 {
     return 0;
 }
@@ -1316,13 +1316,13 @@ static int METAL_GpuCreatePipeline(SDL_GpuPipeline *pipeline)
 }
 
 static int
-METAL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_GpuCpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+METAL_GpuCopyBufferCpuToGpu(SDL_GpuBlitPass *pass, SDL_CpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
     return BlitPassCopyBetweenBuffers(pass, srcbuf->driverdata, srcoffset, dstbuf->driverdata, dstoffset, length);
 }
 
 static int
-METAL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_GpuCpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
+METAL_GpuCopyBufferGpuToCpu(SDL_GpuBlitPass *pass, SDL_GpuBuffer *srcbuf, Uint32 srcoffset, SDL_CpuBuffer *dstbuf, Uint32 dstoffset, Uint32 length)
 {
     return BlitPassCopyBetweenBuffers(pass, srcbuf->driverdata, srcoffset, dstbuf->driverdata, dstoffset, length);
 }
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index a6bd2940297dc..58f4323aa0024 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -24,7 +24,7 @@ SDL_GpuDevice *gpuDevice = NULL;
 
 static void shutdownGpu(void)
 {
-    SDL_GpuDestroyDevice(gpuDevice);
+    SDL_DestroyGpuDevice(gpuDevice);
     gpuDevice = NULL;
 }
 
@@ -38,7 +38,7 @@ static void quit(int rc)
 
 static void initGpu(void)
 {
-    gpuDevice = SDL_GpuCreateDevice("The GPU device", NULL);
+    gpuDevice = SDL_CreateGpuDevice("The GPU device", NULL);
     if (!gpuDevice) {
         SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to create GPU device: %s", SDL_GetError());
         quit(2);
@@ -56,32 +56,32 @@ static void render(SDL_Window *window)
         return;
     }
 
-    cmd = SDL_GpuCreateCommandBuffer("empty command buffer", gpuDevice);
+    cmd = SDL_CreateGpuCommandBuffer("empty command buffer", gpuDevice);
     if (!cmd) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuCreateCommandBuffer(): %s\n", SDL_GetError());
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_CreateGpuCommandBuffer(): %s\n", SDL_GetError());
         quit(2);
     }
 
     currentTime = (double)SDL_GetPerformanceCounter() / SDL_GetPerformanceFrequency();
 
     SDL_zero(color_desc);
-    color_desc.texture = SDL_GpuGetBackbuffer(gpuDevice, window);
+    color_desc.texture = SDL_GetGpuBackbuffer(gpuDevice, window);
     color_desc.color_init = SDL_GPUPASSINIT_CLEAR;
     color_desc.clear_red = (float)(0.5 + 0.5 * SDL_sin(currentTime));
     color_desc.clear_green = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 2 / 3));
     color_desc.clear_blue = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 4 / 3));
     color_desc.clear_alpha = 1.0f;
 
-    pass = SDL_GpuStartRenderPass("just-clear-the-screen render pass", cmd, 1, &color_desc, NULL, NULL);
+    pass = SDL_StartGpuRenderPass("just-clear-the-screen render pass", cmd, 1, &color_desc, NULL, NULL);
     if (!pass) {
-        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_GpuStartRenderPass(): %s\n", SDL_GetError());
+        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "SDL_StartGpuRenderPass(): %s\n", SDL_GetError());
         quit(2);
     }
 
-    SDL_GpuEndRenderPass(pass);
+    SDL_EndGpuRenderPass(pass);
 
     /* literally nothing to do, we just start a pass to say "clear the framebuffer to this color," present, and we're done. */
-    SDL_GpuSubmitCommandBuffer(cmd, NULL);
+    SDL_SubmitGpuCommandBuffer(cmd, NULL);
     SDL_GpuPresent(gpuDevice, window, 1);
 }
 
@@ -117,7 +117,7 @@ int main(int argc, char **argv)
     SDL_Log("Screen BPP    : %d\n", SDL_BITSPERPIXEL(mode.format));
     SDL_GetWindowSize(state->windows[0], &dw, &dh);
     SDL_Log("Window Size   : %d,%d\n", dw, dh);
-    SDL_GpuGetTextureDescription(SDL_GpuGetBackbuffer(gpuDevice, state->windows[0]), &texdesc);  /* !!! FIXME: probably shouldn't do this. */
+    SDL_GetGpuTextureDescription(SDL_GetGpuBackbuffer(gpuDevice, state->windows[0]), &texdesc);  /* !!! FIXME: probably shouldn't do this. */
     SDL_Log("Draw Size     : %d,%d\n", (int) texdesc.width, (int) texdesc.height);
 
     /* Main render loop */
diff --git a/test/testgpu_spinning_cube.c b/test/testgpu_spinning_cube.c
index 1d68eb7e55d82..f25dc26cfb921 100644
--- a/test/testgpu_spinning_cube.c
+++ b/test/testgpu_spinning_cube.c
@@ -47,17 +47,17 @@ static void shutdownGpu(void)
         int i;
         for (i = 0; i < state->num_windows; i++) {
             WindowState *winstate = &window_states[i];
-            SDL_GpuDestroyTextureCycle(winstate->texcycle_depth);
-            SDL_GpuDestroyCpuBufferCycle(winstate->cpubufcycle_uniforms);
-            SDL_GpuDestroyBufferCycle(winstate->gpubufcycle_uniforms);
+            SDL_DestroyGpuTextureCycle(winstate->texcycle_depth);
+            SDL_DestroyGpuCpuBufferCycle(winstate->cpubufcycle_uniforms);
+            SDL_DestroyGpuBufferCycle(winstate->gpubufcycle_uniforms);
         }
         SDL_free(window_states);
         window_states = NULL;
     }
 
-    SDL_GpuDestroyBuffer(render_state.gpubuf_static);
-    SDL_GpuDestroyPipeline(render_state.pipeline);
-    SDL_GpuDestroyDevice(gpu_device);
+    SDL_DestroyGpuBuffer(render_state.gpubuf_static);
+    SDL_DestroyGpuPipeline(render_state.pipeline);
+    SDL_DestroyGpuDevice(gpu_device);
 
     SDL_zero(render_state);
     gpu_device = NULL;
@@ -252,12 +252,12 @@ static void
 Render(SDL_Window *window, const int windownum)
 {
     WindowState *winstate = &window_states[windownum];
-    SDL_GpuTexture *backbuffer = SDL_GpuGetBackbuffer(gpu_device, window);
+    SDL_GpuTexture *backbuffer = SDL_GetGpuBackbuffer(gpu_device, window);
     SDL_GpuColorAttachmentDescription color_attachment;
     SDL_GpuDepthAttachmentDescription depth_attachment;
     SDL_GpuTexture **depth_texture_ptr;
-    SDL_GpuCpuBuffer *cpubuf_uniforms = SDL_GpuNextCpuBufferCycle(winstate->cpubufcycle_uniforms);
-    SDL_GpuBuffer *gpubuf_uniforms = SDL_GpuNextBufferCycle(winstate->gpubufcycle_uniforms);
+    SDL_GpuCpuBuffer *cpubuf_uniforms = SDL_GetNextCpuBufferCycle(winstate->cpubufcycle_uniforms);
+    SDL_GpuBuffer *gpubuf_uniforms = SDL_GetNextGpuBufferCycle(winstate->gpubufcycle_uniforms);
     SDL_GpuTextureDescription texdesc;
     float matrix_rotate[16], matrix_modelview[16], matrix_perspective[16];
     Uint32 drawablew, drawableh;
@@ -271,7 +271,7 @@ Render(SDL_Window *window, const int windownum)
         return;
     }
 
-    SDL_GpuGetTextureDescription(backbuffer, &texdesc);
+    SDL_GetGpuTextureDescription(backbuffer, &texdesc);
     drawablew = texdesc.width;
     drawableh = texdesc.height;
 
@@ -293,8 +293,8 @@ Render(SDL_Window *window, const int windownum)
 
     perspective_matrix(45.0f, (float)drawablew/drawableh, 0.01f, 100.0f, matrix_perspective);
 
-    multiply_matrix(matrix_perspective, matrix_modelview, (float *) SDL_GpuLockCpuBuffer(cpubuf_uniforms, NULL));
-    SDL_GpuUnlockCpuBuffer(cpubuf_uniforms);
+    multiply_matrix(matrix_perspective, matrix_modelview, (float *) SDL_LockCpuBuffer(cpubuf_uniforms, NULL));
+    SDL_UnlockCpuBuffer(cpubuf_uniforms);
 
     winstate->angle_x += 3;
     winstate->angle_y += 2;
@@ -308,20 +308,20 @@ Render(SDL_Window *window, const int windownum)
     if(winstate->angle_z < 0) winstate->angle_z += 360;
 
     /* Copy the new uniform data to the GPU */
-    cmd = SDL_GpuCreateCommandBuffer("Render new frame", gpu_device);
+    cmd = SDL_CreateGpuCommandBuffer("Render new frame", gpu_device);
     if (!cmd) {
         SDL_Log("Failed to create command buffer: %s\n", SDL_GetError());
         quit(2);
     }
 
-    blit = SDL_GpuStartBlitPass("Copy mvp matrix to GPU pass", cmd);
+    blit = SDL_StartGpuBlitPass("Copy mvp matrix to GPU pass", cmd);
     if (!blit) {
         SDL_Log("Failed to create blit pass: %s\n", SDL_GetError());
         quit(2);
     }
 
-    SDL_GpuCopyBufferCpuToGpu(blit, cpubuf_uniforms, 0, gpubuf_uniforms, 0, sizeof (float) * 16);
-    SDL_GpuEndBlitPass(blit);
+    SDL_CopyGpuBufferToCpu(blit, cpubuf_uniforms, 0, gpubuf_uniforms, 0, sizeof (float) * 16);
+    SDL_EndGpuBlitPass(blit);
 
     SDL_zero(color_attachment);
     color_attachment.texture = backbuffer;
@@ -330,8 +330,8 @@ Render(SDL_Window *window, const int windownum)
 
     /* resize the depth texture if the window size changed */
     SDL_snprintf(label, sizeof (label), "Depth buffer for window #%d", windownum);
-    depth_texture_ptr = SDL_GpuNextTexturePtrCycle(winstate->texcycle_depth);
-    if (SDL_GpuMatchingDepthTexture(label, gpu_device, color_attachment.texture, depth_texture_ptr) == NULL) {
+    depth_texture_ptr = SDL_GetNextGpuTexturePtrInCycle(winstate->texcycle_depth);
+    if (SDL_MatchingGpuDepthTexture(label, gpu_device, color_attachment.texture, depth_texture_ptr) == NULL) {
         SDL_Log("Failed to prepare depth buffer for window #%d: %s\n", windownum, SDL_GetError());
         quit(2);
     }
@@ -345,17 +345,17 @@ Render(SDL_Window *window, const int windownum)
     /* Draw the cube! */
 
     /* !!! FIXME: does viewport/scissor default to the texture size? Because that would be nice. */
-    render = SDL_GpuStartRenderPass("Spinning cube render pass", cmd, 1, &color_attachment, &depth_attachment, NULL);
-    SDL_GpuSetRenderPassPipeline(render, render_state.pipeline);
-    SDL_GpuSetRenderPassViewport(render, 0.0, 0.0, (double) drawablew, (double) drawableh, 0.0, 1.0);  /* !!! FIXME near and far are wrong */
-    SDL_GpuSetRenderPassScissor(render, 0.0, 0.0, (double) drawablew, (double) drawableh);
-    SDL_GpuSetRenderPassVertexBuffer(render, render_state.gpubuf_static, 0, 0);
-    SDL_GpuSetRenderPassVertexBuffer(render, gpubuf_uniforms, 0, 1);
+    render = SDL_StartGpuRenderPass("Spinning cube render pass", cmd, 1, &color_attachment, &depth_attachment, NULL);
+    SDL_SetGpuRenderPassPipeline(render, render_state.pipeline);
+    SDL_SetGpuRenderPassViewport(render, 0.0, 0.0, (double) drawablew, (double) drawableh, 0.0, 1.0);  /* !!! FIXME near and far are wrong */
+    SDL_SetGpuRenderPassScissor(render, 0.0, 0.0, (double) drawablew, (double) drawableh);
+    SDL_SetGpuRenderPassVertexBuffer(render, render_state.gpubuf_static, 0, 0);
+    SDL_SetGpuRenderPassVertexBuffer(render, gpubuf_uniforms, 0, 1);
     SDL_GpuDraw(render, 0, SDL_arraysize(vertex_data));
-    SDL_GpuEndRenderPass(render);
+    SDL_EndGpuRenderPass(render);
 
     /* push work to the GPU and tell it to present to the window when done. */
-    SDL_GpuSubmitCommandBuffer(cmd, NULL);
+    SDL_SubmitGpuCommandBuffer(cmd, NULL);
     SDL_GpuPresent(gpu_device, window, 1);
 }
 
@@ -365,11 +365,11 @@ static SDL_GpuShader *load_shader(const char *label, const char *src, const char
     Uint8 *bytecode = NULL;
     Uint32 bytecodelen = 0;
     /* !!! FIXME: this is broken right now, we need to compile this with the external tools and just keep the binary embedded in here. */
-    if (SDL_GpuCompileShader(src, -1, type, "main", &bytecode, &bytecodelen) == -1) {
+    if (SDL_CompileGpuShader(src, -1, type, "main", &bytecode, &bytecodelen) == -1) {
         SDL_Log("Failed to compile %s shader: %s", type, SDL_GetError());
         quit(2);
     }
-    retval = SDL_GpuCreateShader(label, gpu_device, bytecode, bytecodelen);
+    retval = SDL_CreateGpuShader(label, gpu_device, bytecode, bytecodelen);
     if (!retval) {
         SDL_Log("Failed to load %s shader bytecode: %s", type, SDL_GetError());
         quit(2);
@@ -393,17 +393,17 @@ init_render_state(void)
 
     #define CHECK_CREATE(var, thing) { if (!(var)) { SDL_Log("Failed to create %s: %s\n", thing, SDL_GetError()); quit(2); } }
 
-    gpu_device = SDL_GpuCreateDevice("The GPU device", NULL);
+    gpu_device = SDL_CreateGpuDevice("The GPU device", NULL);
     CHECK_CREATE(gpu_device, "GPU device");
 
     vertex_shader = load_shader("Spinning cube vertex shader", shader_vert_src, "vertex_main");
     fragment_shader = load_shader("Spinning cube fragment shader", shader_frag_src, "fragment_main");
 
     /* We just need to upload the static data once. */
-    render_state.gpubuf_static = SDL_GpuCreateAndInitBuffer("Static vertex data GPU buffer", gpu_device, sizeof (vertex_data), vertex_data);
+    render_state.gpubuf_static = SDL_CreateAndInitGpuBuffer("Static vertex data GPU buffer", gpu_device, sizeof (vertex_data), vertex_data);
     CHECK_CREATE(render_state.gpubuf_static, "static vertex GPU buffer");
 
-    SDL_GpuDefaultPipelineDescription(&pipelinedesc);
+    SDL_GetDefaultGpuPipelineDescription(&pipelinedesc);
     pipelinedesc.label = "The spinning cube pipeline";
     pipelinedesc.primitive = SDL_GPUPRIM_TRIANGLESTRIP;
     pipelinedesc.vertex_shader = vertex_shader;
@@ -417,15 +417,15 @@ init_render_state(void)
     pipelinedesc.color_attachments[0].blending_enabled = SDL_FALSE;
     pipelinedesc.depth_format = SDL_GPUPIXELFMT_Depth24_Stencil8;
 
-    render_state.pipeline = SDL_GpuCreatePipeline(gpu_device, &pipelinedesc);
+    render_state.pipeline = SDL_CreateGpuPipeline(gpu_device, &pipelinedesc);
     if (!render_state.pipeline) {
         SDL_Log("Failed to create render pipeline: %s\n", SDL_GetError());
         quit(2);
     }
 
     /* These are reference-counted; once the pipeline is created, you don't need to keep these. */
-    SDL_GpuDestroyShader(vertex_shader);
-    SDL_GpuDestroyShader(fragment_shader);
+    SDL_DestroyGpuShader(vertex_shader);
+    SDL_DestroyGpuShader(fragment_shader);
 
     window_states = (WindowState *) SDL_calloc(state->num_windows, sizeof (WindowState));
     if (!window_states) {
@@ -440,15 +440,15 @@ init_render_state(void)
         char label[32];
 
         SDL_snprintf(label, sizeof (label), "Window #%d uniform staging buffer", i);
-        winstate->cpubufcycle_uniforms = SDL_GpuCreateCpuBufferCycle(label, gpu_device, sizeof (float) * 16, NULL, 3);
+        winstate->cpubufcycle_uniforms = SDL_CreateCpuBufferCycle(label, gpu_device, sizeof (float) * 16, NULL, 3);
         CHECK_CREATE(winstate->cpubufcycle_uniforms, label);
 
         SDL_snprintf(label, sizeof (label), "Window #%d uniform GPU buffer", i);
-        winstate->gpubufcycle_uniforms = SDL_GpuCreateBufferCycle(label, gpu_device, sizeof (float) * 16, 3);
+        winstate->gpubufcycle_uniforms = SDL_CreateGpuBufferCycle(label, gpu_device, sizeof (float) * 16, 3);
         CHECK_CREATE(winstate->gpubufcycle_uniforms, label);
 
         SDL_snprintf(label, sizeof (label), "Window #%d depth texture", i);  /* NULL texdesc, so we'll build them as we need them. */
-        winstate->texcycle_depth = SDL_GpuCreateTextureCycle(label, gpu_device, NULL, 3);
+        winstate->texcycle_depth = SDL_CreateGpuTextureCycle(label, gpu_device, NULL, 3);
         CHECK_CREATE(winstate->texcycle_depth, label);
 
         /* make each window different */

From 0c9401291bcf5448c17acd4724ffdd35fb5fbac9 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 Jan 2023 09:53:15 -0500
Subject: [PATCH 49/54] gpu: Removed API warning.

This is still in flux, but then again, so is SDL3, and the warnings make
the buildbots (with -Werror) fail.
---
 include/SDL3/SDL_gpu.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index 1e257f8abb4a9..38ba352c34103 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -38,13 +38,6 @@
 extern "C" {
 #endif
 
-#ifndef SDL_SUPPRESS_GPU_API_UNSTABLE_WARNING
-#warning The SDL GPU API is still in development. Expect things to change!
-#warning DO NOT SHIP BUILDS OF SDL TO THE PUBLIC WITH THIS CODE IN IT.
-#warning DO NOT SHIP _ANYTHING_ THAT USES THIS API.
-#warning This warning will be removed when the API stabilizes.
-#endif
-
 /* !!! FIXME: this all needs formal (and significantly more robust) documentation. */
 
 /*

From aa05de3e191c4ec7a3e27484802a0075e220dd10 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 Jan 2023 17:12:58 -0500
Subject: [PATCH 50/54] gpu: Fix more build errors from SDL3 migration.

---
 CMakeLists.txt              | 1 +
 test/testgpu_simple_clear.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4bdfe6e3637c6..0a05c263e4123 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2205,6 +2205,7 @@ elseif(APPLE)
         endif()
         if(SDL_RENDER_METAL)
           sdl_glob_sources("${SDL3_SOURCE_DIR}/src/render/metal/*.m")
+          sdl_glob_sources("${SDL3_SOURCE_DIR}/src/gpu/metal/*.m")
           set(SDL_VIDEO_RENDER_METAL 1)
           set(SDL_GPU_METAL 1)
           set(HAVE_RENDER_METAL TRUE)
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index 58f4323aa0024..957ff40c07b43 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -68,8 +68,8 @@ static void render(SDL_Window *window)
     color_desc.texture = SDL_GetGpuBackbuffer(gpuDevice, window);
     color_desc.color_init = SDL_GPUPASSINIT_CLEAR;
     color_desc.clear_red = (float)(0.5 + 0.5 * SDL_sin(currentTime));
-    color_desc.clear_green = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 2 / 3));
-    color_desc.clear_blue = (float)(0.5 + 0.5 * SDL_sin(currentTime + M_PI * 4 / 3));
+    color_desc.clear_green = (float)(0.5 + 0.5 * SDL_sin(currentTime + SDL_PI_D * 2 / 3));
+    color_desc.clear_blue = (float)(0.5 + 0.5 * SDL_sin(currentTime + SDL_PI_D * 4 / 3));
     color_desc.clear_alpha = 1.0f;
 
     pass = SDL_StartGpuRenderPass("just-clear-the-screen render pass", cmd, 1, &color_desc, NULL, NULL);

From 52f61cef8a0192fb3aded4d98fd342e420210ea1 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 Jan 2023 17:26:33 -0500
Subject: [PATCH 51/54] gpu: testgpu_spinning_cube.c needs the shader compiler,
 disable for now.

---
 test/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 89766daa8e0b1..73cc0c349cdac 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -361,7 +361,7 @@ add_sdl_test_executable(testvulkan NO_C90 SOURCES testvulkan.c)
 add_sdl_test_executable(testoffscreen SOURCES testoffscreen.c)
 add_sdl_test_executable(testpopup SOURCES testpopup.c)
 add_sdl_test_executable(testgpu_simple_clear TESTUTILS SOURCES testgpu_simple_clear.c)
-add_sdl_test_executable(testgpu_spinning_cube TESTUTILS SOURCES testgpu_spinning_cube.c)
+#add_sdl_test_executable(testgpu_spinning_cube TESTUTILS SOURCES testgpu_spinning_cube.c)
 
 check_c_compiler_flag(-Wformat-overflow HAVE_WFORMAT_OVERFLOW)
 if(HAVE_WFORMAT_OVERFLOW)

From 4e3fc89c2cfe0ad606b1a2f854d69c74d139a881 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Thu, 12 Jan 2023 17:36:17 -0500
Subject: [PATCH 52/54] gpu: Turn off the tests for now.

We're not wired into the dynapi yet, so they'll fail to link.
---
 test/CMakeLists.txt         | 2 +-
 test/testgpu_simple_clear.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 73cc0c349cdac..02d671da8bb58 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -360,7 +360,7 @@ add_sdl_test_executable(testcustomcursor SOURCES testcustomcursor.c)
 add_sdl_test_executable(testvulkan NO_C90 SOURCES testvulkan.c)
 add_sdl_test_executable(testoffscreen SOURCES testoffscreen.c)
 add_sdl_test_executable(testpopup SOURCES testpopup.c)
-add_sdl_test_executable(testgpu_simple_clear TESTUTILS SOURCES testgpu_simple_clear.c)
+#add_sdl_test_executable(testgpu_simple_clear TESTUTILS SOURCES testgpu_simple_clear.c)
 #add_sdl_test_executable(testgpu_spinning_cube TESTUTILS SOURCES testgpu_spinning_cube.c)
 
 check_c_compiler_flag(-Wformat-overflow HAVE_WFORMAT_OVERFLOW)
diff --git a/test/testgpu_simple_clear.c b/test/testgpu_simple_clear.c
index 957ff40c07b43..ce541f75d38f3 100644
--- a/test/testgpu_simple_clear.c
+++ b/test/testgpu_simple_clear.c
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
     int done;
     SDL_DisplayMode mode;
     SDL_Event event;
-    Uint32 then, now, frames;
+    Uint64 then, now, frames;
     SDL_GpuTextureDescription texdesc;
     int dw, dh;
 

From b1fb03d304437611c59c72fbee02c13b6cd49df0 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Fri, 13 Jan 2023 00:58:24 -0500
Subject: [PATCH 53/54] gpu: More patches for SDL3 migration.

---
 src/gpu/metal/SDL_gpu_metal.m | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index d2f0ce5d9c01e..dcde30f3d5e66 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -468,28 +468,27 @@ @implementation METAL_GpuBlitPassData
 }
 
 
-static SDL_MetalView
-GetWindowView(SDL_Window *window)
+static SDL_MetalView GetWindowView(SDL_Window *window)
 {
     SDL_SysWMinfo info;
 
-    SDL_VERSION(&info.version);
-    if (SDL_GetWindowWMInfo(window, &info)) {
-#ifdef __MACOSX__
+    if (SDL_GetWindowWMInfo(window, &info, SDL_SYSWM_CURRENT_VERSION) == 0) {
+#ifdef SDL_ENABLE_SYSWM_COCOA
         if (info.subsystem == SDL_SYSWM_COCOA) {
             NSView *view = info.info.cocoa.window.contentView;
             if (view.subviews.count > 0) {
                 view = view.subviews[0];
                 if (view.tag == SDL_METALVIEW_TAG) {
-                    return (SDL_MetalView) CFBridgingRetain(view);
+                    return (SDL_MetalView)CFBridgingRetain(view);
                 }
             }
         }
-#else
+#endif
+#ifdef SDL_ENABLE_SYSWM_UIKIT
         if (info.subsystem == SDL_SYSWM_UIKIT) {
             UIView *view = info.info.uikit.window.rootViewController.view;
             if (view.tag == SDL_METALVIEW_TAG) {
-                return (SDL_MetalView) CFBridgingRetain(view);
+                return (SDL_MetalView)CFBridgingRetain(view);
             }
         }
 #endif

From 5db1fd7491ccf1d9b209c91ba163da0b2fb54f6f Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <icculus@icculus.org>
Date: Wed, 11 Oct 2023 00:11:46 -0400
Subject: [PATCH 54/54] gpu: Updated for SDL3 interface changes.

---
 src/gpu/SDL_gpu.c    | 8 ++++----
 src/gpu/SDL_sysgpu.h | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 5d0fe071e388e..49343796f9684 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -703,8 +703,8 @@ SDL_CreateGpuStateCache(const char *label, SDL_GpuDevice *device)
             cache->device = device;
             cache->pipeline_mutex = SDL_CreateMutex();
             cache->sampler_mutex = SDL_CreateMutex();
-            cache->pipeline_cache = SDL_NewHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
-            cache->sampler_cache = SDL_NewHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
+            cache->pipeline_cache = SDL_CreateHashTable(NULL, 128, hash_pipeline, keymatch_pipeline, nuke_pipeline, SDL_FALSE);
+            cache->sampler_cache = SDL_CreateHashTable(NULL, 16, hash_sampler, keymatch_sampler, nuke_sampler, SDL_FALSE);
             if (!cache->pipeline_mutex || !cache->sampler_mutex || !cache->pipeline_cache || !cache->sampler_cache) {
                 SDL_DestroyGpuStateCache(cache);  /* can clean up half-created objects. */
                 cache = NULL;
@@ -756,9 +756,9 @@ SDL_DestroyGpuStateCache(SDL_GpuStateCache *cache)
 {
     if (cache) {
         SDL_DestroyMutex(cache->pipeline_mutex);
-        SDL_FreeHashTable(cache->pipeline_cache);
+        SDL_DestroyHashTable(cache->pipeline_cache);
         SDL_DestroyMutex(cache->sampler_mutex);
-        SDL_FreeHashTable(cache->sampler_cache);
+        SDL_DestroyHashTable(cache->sampler_cache);
         FREE_AND_NULL_OBJ_WITH_LABEL(cache);
     }
 }
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index cc3a0b36e5cc4..41200518a1671 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -55,7 +55,7 @@ struct SDL_GpuShader
 {
     SDL_GpuDevice *device;
     const char *label;
-    SDL_atomic_t refcount;
+    SDL_AtomicInt refcount;
     void *driverdata;
 };
 
@@ -182,9 +182,9 @@ struct SDL_GpuStateCache
 {
     const char *label;
     SDL_GpuDevice *device;
-    SDL_mutex *pipeline_mutex;
+    SDL_Mutex *pipeline_mutex;
     SDL_HashTable *pipeline_cache;
-    SDL_mutex *sampler_mutex;
+    SDL_Mutex *sampler_mutex;
     SDL_HashTable *sampler_cache;
 };
 

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/libsdl-org/SDL/pull/7067.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/libsdl-org/SDL/pull/7067.patch" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/libsdl-org/SDL/pull/7067.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/libsdl-org/SDL/pull/7067.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>