diff --git a/.appveyor.yml b/.appveyor.yml
index 300979e..3a08426 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -19,8 +19,5 @@ install:
   - go version
   - go env
 
-before_build:
-  - bash -lc "pacman --noconfirm --needed -Sy mingw-w64-i686-openal"
-
 build_script:
   - bash -lc "cd /c/gopath/src/github.com/gen2brain/raylib-go && go get -t ./... && make"
diff --git a/.travis.yml b/.travis.yml
index f757d1c..d8aa708 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,5 +5,5 @@ go:
 
 install:
   - sudo apt-get update -y
-  - sudo apt-get install libopenal-dev libxi-dev libxinerama-dev libxcursor-dev libxxf86vm-dev libxrandr-dev -y
+  - sudo apt-get install libxi-dev libxinerama-dev libxcursor-dev libxxf86vm-dev libxrandr-dev -y
   - go get -t ./...
diff --git a/README.md b/README.md
index 387cd86..4fd2650 100644
--- a/README.md
+++ b/README.md
@@ -12,28 +12,24 @@ Golang bindings for [raylib](http://www.raylib.com/), a simple and easy-to-use l
 
 ### Requirements
 
-* [OpenAL Soft](http://kcat.strangesoft.net/openal.html)
-NOTE: if you don't need audio you can use `-tags noaudio` during build, OpenAL will not be linked to binary, though none of the audio functions will be available.
-
 * [GLFW](http://www.glfw.org/) is included as part of the Go package, but you need to make sure you have dependencies installed, see below.
 
 ##### Ubuntu
 
-    apt-get install libopenal-dev libgl1-mesa-dev libxi-dev libxinerama-dev libxcursor-dev libxxf86vm-dev libxrandr-dev
+    apt-get install libgl1-mesa-dev libxi-dev libxinerama-dev libxcursor-dev libxxf86vm-dev libxrandr-dev
 
 ##### Fedora
 
-    dnf install openal-soft-devel mesa-libGL-devel libXi-devel libXcursor-devel libXrandr-devel libXinerama-devel
+    dnf install mesa-libGL-devel libXi-devel libXcursor-devel libXrandr-devel libXinerama-devel
 
-##### OS X
+##### macOS
 
-On OS X system OpenAL framework is used, you need Xcode or Command Line Tools for Xcode.
+On macOS you need Xcode or Command Line Tools for Xcode.
 
-##### Windows ([MSYS2](https://msys2.github.io/))
+##### Windows
 
-    pacman -S mingw-w64-x86_64-openal mingw-w64-x86_64-gcc mingw-w64-x86_64-go git
-
-On Windows, build binary in MSYS2 shell.
+On Windows you need C compiler, like [https://mingw-w64.org](Mingw-w64) or [http://tdm-gcc.tdragon.net/](TDM-GCC).
+You can also build binary in [MSYS2](https://msys2.github.io/) shell.
 
 ##### Android
 
@@ -49,11 +45,10 @@ On Windows, build binary in MSYS2 shell.
 
 ### Build tags
 
-* `noaudio` - disables audio functions and doesn't link against OpenAL libraries
+* `noaudio` - disables audio functions
 * `opengl21` - uses OpenGL 2.1 backend (default is 3.3 on desktop)
 * `opengl11` - uses OpenGL 1.1 backend (pseudo OpenGL 1.1 style)
 * `wayland` - builds against Wayland libraries
-* `static` - links against OpenAL static libraries
 
 ### Documentation
 
diff --git a/examples/android/example/README.md b/examples/android/example/README.md
index d79cde6..a0a17c2 100644
--- a/examples/android/example/README.md
+++ b/examples/android/example/README.md
@@ -4,13 +4,13 @@ To compile example to shared library you will need [Android NDK](https://develop
 To build Android apk you will need [Android SDK](http://developer.android.com/sdk/index.html#Other).
 Download and unpack archives somewhere.
 
-Go must be cross compiled for android. There is a bootstrap.sh script that you can use to compile Go and OpenAL for android/arm and android/arm64.
+Go must be cross compiled for android. There is a bootstrap.sh script that you can use to compile Go for android/arm and android/arm64.
 
 Export path to Android NDK, point to location where you have unpacked archive:
 
     export ANDROID_NDK_HOME=/opt/android-ndk
 
-Compile Go, OpenAL and android_native_app_glue, /usr/local is prefix where Go and Android toolchains will be installed:
+Compile Go and android_native_app_glue, /usr/local is prefix where Go and Android toolchains will be installed:
 
     ./bootstrap.sh /usr/local
 
diff --git a/examples/android/example/bootstrap.sh b/examples/android/example/bootstrap.sh
index 033aa9f..01f0525 100755
--- a/examples/android/example/bootstrap.sh
+++ b/examples/android/example/bootstrap.sh
@@ -37,10 +37,6 @@ if [[ -z "$GO_VERSION" ]]; then
     # go1.9.2
     GO_VERSION=`curl -s https://golang.org/dl/ | grep 'id="go' | head -n1 | awk -F'"' '{print $4}'`
 fi
-if [[ -z "$OPENAL_VERSION" ]]; then
-    # 1.18.2
-    OPENAL_VERSION=`curl -s http://kcat.strangesoft.net/openal.html | grep 'tar.bz2' | awk -F'"' '{print $2}' | awk -F'-' '{print $4}' | sed 's/.tar.bz2//'`
-fi
 if [[ -z "$NDK_VERSION" ]]; then
     # r15c
     NDK_VERSION=`curl -s https://developer.android.com/ndk/downloads/index.html | grep 'id="stable-downloads"' | awk -F'(' '{print $2}' | awk -F')' '{print $1}'`
@@ -114,50 +110,6 @@ cp -r -f ${BUILD_DIR}/go ${INSTALL_PREFIX}
 
 ###################################################
 
-echo; echo "##### Download OpenAL ${OPENAL_VERSION}"
-
-cd ${BUILD_DIR} && curl -L --progress-bar http://kcat.strangesoft.net/openal-releases/openal-soft-${OPENAL_VERSION}.tar.bz2 | tar -xj || exit 1
-
-echo; echo "##### Compile OpenAL ${OPENAL_VERSION}"
-
-cat << EOF > ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/android-arm.cmake
-set(CMAKE_SYSTEM_NAME Android)
-set(CMAKE_ANDROID_ARCH arm)
-set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a)
-set(TOOLCHAIN_PREFIX arm-linux-androideabi)
-set(CMAKE_C_COMPILER \${TOOLCHAIN_PREFIX}-${MYCC})
-set(CMAKE_CXX_COMPILER \${TOOLCHAIN_PREFIX}-${MYCXX})
-set(CMAKE_FIND_ROOT_PATH \${INSTALL_PREFIX}/android-arm)
-set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-EOF
-
-cat << EOF > ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/android-arm64.cmake
-set(CMAKE_SYSTEM_NAME Android)
-set(CMAKE_ANDROID_ARCH arm64)
-set(CMAKE_ANDROID_ARCH_ABI arm64-v8a)
-set(TOOLCHAIN_PREFIX aarch64-linux-android)
-set(CMAKE_C_COMPILER \${TOOLCHAIN_PREFIX}-${MYCC})
-set(CMAKE_CXX_COMPILER \${TOOLCHAIN_PREFIX}-${MYCXX})
-set(CMAKE_FIND_ROOT_PATH \${INSTALL_PREFIX}/android-arm64)
-set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-EOF
-
-mkdir -p ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-arm
-cd ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-arm
-cmake -DLIBTYPE=STATIC -DCMAKE_TOOLCHAIN_FILE=../android-arm.cmake -DCMAKE_C_FLAGS="-DANDROID -D__ANDROID_API__=${API_VERSION_ARM}" -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}/android-arm -DCMAKE_ANDROID_STANDALONE_TOOLCHAIN=${INSTALL_PREFIX}/android-arm -DALSOFT_NO_CONFIG_UTIL=ON -DALSOFT_UTILS=OFF -DALSOFT_EXAMPLES=OFF -DALSOFT_TESTS=OFF -DALSOFT_CONFIG=OFF -DALSOFT_HRTF_DEFS=OFF -DALSOFT_AMBDEC_PRESETS=OFF .. || exit 1
-make -j $(nproc) VERBOSE=1 && make install || exit 1
-
-mkdir -p ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-arm64
-cd ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-arm64
-cmake -DLIBTYPE=STATIC -DCMAKE_TOOLCHAIN_FILE=../android-arm64.cmake -DCMAKE_C_FLAGS="-DANDROID -D__ANDROID_API__=${API_VERSION_ARM64}" -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}/android-arm64 -DCMAKE_ANDROID_STANDALONE_TOOLCHAIN=${INSTALL_PREFIX}/android-arm64 -DALSOFT_NO_CONFIG_UTIL=ON -DALSOFT_UTILS=OFF -DALSOFT_EXAMPLES=OFF -DALSOFT_TESTS=OFF -DALSOFT_CONFIG=OFF -DALSOFT_HRTF_DEFS=OFF -DALSOFT_AMBDEC_PRESETS=OFF .. || exit 1
-make -j $(nproc) VERBOSE=1 && make install || exit 1
-
-###################################################
-
 echo; echo "##### Compile android_native_app_glue"
 mkdir -p ${BUILD_DIR}/native_app_glue/jni
 cp -r ${ANDROID_NDK_HOME}/sources/android/native_app_glue/* ${BUILD_DIR}/native_app_glue/jni/
diff --git a/examples/rpi/basic_window/bootstrap.sh b/examples/rpi/basic_window/bootstrap.sh
index 024c5c1..4355e0f 100755
--- a/examples/rpi/basic_window/bootstrap.sh
+++ b/examples/rpi/basic_window/bootstrap.sh
@@ -9,8 +9,6 @@ GO_OS="linux"
 GO_ARCH="amd64"
 GO_VERSION=`curl -s https://golang.org/dl/ | grep 'id="go' | head -n1 | awk -F'"' '{print $4}'`
 
-OPENAL_VERSION="1.17.2"
-
 INSTALL_PREFIX="$1"
 export PATH=${INSTALL_PREFIX}/gcc-linaro-arm-linux-gnueabihf-raspbian-x64/bin:${PATH}
 
@@ -39,23 +37,5 @@ GOROOT_BOOTSTRAP=${BUILD_DIR}/bootstrap/go CC_FOR_TARGET=arm-linux-gnueabihf-gcc
 
 cp -r -f ${BUILD_DIR}/go ${INSTALL_PREFIX}
 
-echo "##### Compile OpenAL"
-
-cd ${BUILD_DIR} && curl -s -L  http://kcat.strangesoft.net/openal-releases/openal-soft-${OPENAL_VERSION}.tar.bz2 | tar -xj
-
-cat << EOF > ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/linux-rpi.cmake
-set(TOOLCHAIN_PREFIX arm-linux-gnueabihf)
-set(CMAKE_C_COMPILER \${TOOLCHAIN_PREFIX}-gcc)
-set(CMAKE_FIND_ROOT_PATH \${INSTALL_PREFIX}/gcc-linaro-arm-linux-gnueabihf-raspbian-x64)
-set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-EOF
-
-mkdir -p ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-rpi
-cd ${BUILD_DIR}/openal-soft-${OPENAL_VERSION}/build-rpi
-cmake -DLIBTYPE=STATIC -DCMAKE_TOOLCHAIN_FILE=../linux-rpi.cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 ..
-make -j $(nproc) && make install
-
 echo "##### Remove build directory"
 rm -rf ${BUILD_DIR}
diff --git a/raylib/audio.c b/raylib/audio.c
index a7cd665..5efca74 100644
--- a/raylib/audio.c
+++ b/raylib/audio.c
@@ -74,6 +74,10 @@
 #define SUPPORT_FILEFORMAT_MOD
 //-------------------------------------------------
 
+#ifndef USE_MINI_AL
+#define USE_MINI_AL 1           // Set to 1 to use mini_al; 0 to use OpenAL.
+#endif
+
 #if defined(AUDIO_STANDALONE)
     #include "audio.h"
     #include <stdarg.h>         // Required for: va_list, va_start(), vfprintf(), va_end()
@@ -82,17 +86,21 @@
     #include "utils.h"          // Required for: fopen() Android mapping
 #endif
 
-#if defined(__APPLE__)
-    #include "OpenAL/al.h"          // OpenAL basic header
-    #include "OpenAL/alc.h"         // OpenAL context header (like OpenGL, OpenAL requires a context to work)
-#else
-    #include "AL/al.h"              // OpenAL basic header
-    #include "AL/alc.h"             // OpenAL context header (like OpenGL, OpenAL requires a context to work)
-    //#include "AL/alext.h"         // OpenAL extensions header, required for AL_EXT_FLOAT32 and AL_EXT_MCFORMATS
-#endif
+#include "external/mini_al.h"   // Implemented in mini_al.c. Cannot implement this here because it conflicts with Win32 APIs such as CloseWindow(), etc.
 
-// OpenAL extension: AL_EXT_FLOAT32 - Support for 32bit float samples
-// OpenAL extension: AL_EXT_MCFORMATS - Support for multi-channel formats (Quad, 5.1, 6.1, 7.1)
+#if !defined(USE_MINI_AL) || USE_MINI_AL == 0
+    #if defined(__APPLE__)
+        #include "OpenAL/al.h"          // OpenAL basic header
+        #include "OpenAL/alc.h"         // OpenAL context header (like OpenGL, OpenAL requires a context to work)
+    #else
+        #include "AL/al.h"              // OpenAL basic header
+        #include "AL/alc.h"             // OpenAL context header (like OpenGL, OpenAL requires a context to work)
+        //#include "AL/alext.h"         // OpenAL extensions header, required for AL_EXT_FLOAT32 and AL_EXT_MCFORMATS
+    #endif
+
+    // OpenAL extension: AL_EXT_FLOAT32 - Support for 32bit float samples
+    // OpenAL extension: AL_EXT_MCFORMATS - Support for multi-channel formats (Quad, 5.1, 6.1, 7.1)
+#endif
 
 #include <stdlib.h>             // Required for: malloc(), free()
 #include <string.h>             // Required for: strcmp(), strncmp()
@@ -202,10 +210,242 @@ void TraceLog(int msgType, const char *text, ...);              // Show trace lo
 //----------------------------------------------------------------------------------
 // Module Functions Definition - Audio Device initialization and Closing
 //----------------------------------------------------------------------------------
+#if USE_MINI_AL
+#define DEVICE_FORMAT       mal_format_f32
+#define DEVICE_CHANNELS     2
+#define DEVICE_SAMPLE_RATE  44100
+
+typedef enum { AUDIO_BUFFER_USAGE_STATIC = 0, AUDIO_BUFFER_USAGE_STREAM } AudioBufferUsage;
+
+typedef struct AudioBuffer AudioBuffer;
+struct AudioBuffer
+{
+    mal_dsp dsp;                        // For format conversion.
+    float volume;
+    float pitch;
+    bool playing;
+    bool paused;
+    bool looping;                       // Always true for AudioStreams.
+    AudioBufferUsage usage;             // Slightly different logic is used when feeding data to the playback device depending on whether or not data is streamed.
+    bool isSubBufferProcessed[2];
+    unsigned int frameCursorPos;
+    unsigned int bufferSizeInFrames;
+    AudioBuffer* next;
+    AudioBuffer* prev;
+    unsigned char buffer[1];
+};
+
+void StopAudioBuffer(AudioBuffer* audioBuffer);
+
+
+static mal_context context;
+static mal_device device;
+static mal_bool32 isAudioInitialized = MAL_FALSE;
+static float masterVolume = 1;
+static mal_mutex audioLock;
+static AudioBuffer* firstAudioBuffer = NULL;   // Audio buffers are tracked in a linked list.
+static AudioBuffer* lastAudioBuffer = NULL;
+
+static void TrackAudioBuffer(AudioBuffer* audioBuffer)
+{
+    mal_mutex_lock(&audioLock);
+    {
+        if (firstAudioBuffer == NULL) {
+            firstAudioBuffer = audioBuffer;
+        } else {
+            lastAudioBuffer->next = audioBuffer;
+            audioBuffer->prev = lastAudioBuffer;
+        }
+
+        lastAudioBuffer = audioBuffer;
+    }
+    mal_mutex_unlock(&audioLock);
+}
+
+static void UntrackAudioBuffer(AudioBuffer* audioBuffer)
+{
+    mal_mutex_lock(&audioLock);
+    {
+        if (audioBuffer->prev == NULL) {
+            firstAudioBuffer = audioBuffer->next;
+        } else {
+            audioBuffer->prev->next = audioBuffer->next;
+        }
+
+        if (audioBuffer->next == NULL) {
+            lastAudioBuffer = audioBuffer->prev;
+        } else {
+            audioBuffer->next->prev = audioBuffer->prev;
+        }
+
+        audioBuffer->prev = NULL;
+        audioBuffer->next = NULL;
+    }
+    mal_mutex_unlock(&audioLock);
+}
+
+static void OnLog_MAL(mal_context* pContext, mal_device* pDevice, const char* message)
+{
+    (void)pContext;
+    (void)pDevice;
+    TraceLog(LOG_ERROR, message);   // All log messages from mini_al are errors.
+}
+
+// This is the main mixing function. Mixing is pretty simple in this project - it's just an accumulation.
+//
+// framesOut is both an input and an output. It will be initially filled with zeros outside of this function.
+static void MixFrames(float* framesOut, const float* framesIn, mal_uint32 frameCount, float localVolume)
+{
+    for (mal_uint32 iFrame = 0; iFrame < frameCount; ++iFrame) {
+        for (mal_uint32 iChannel = 0; iChannel < device.channels; ++iChannel) {
+                  float* frameOut = framesOut + (iFrame * device.channels);
+            const float* frameIn  = framesIn  + (iFrame * device.channels);
+
+            frameOut[iChannel] += frameIn[iChannel] * masterVolume * localVolume;
+        }
+    }
+}
+
+static mal_uint32 OnSendAudioDataToDevice(mal_device* pDevice, mal_uint32 frameCount, void* pFramesOut)
+{
+    // This is where all of the mixing takes place.
+    (void)pDevice;
+
+    // Mixing is basically just an accumulation. We need to initialize the output buffer to 0.
+    memset(pFramesOut, 0, frameCount*pDevice->channels*mal_get_sample_size_in_bytes(pDevice->format));
+
+    // Using a mutex here for thread-safety which makes things not real-time. This is unlikely to be necessary for this project, but may
+    // want to consider how you might want to avoid this.
+    mal_mutex_lock(&audioLock);
+    {
+        for (AudioBuffer* audioBuffer = firstAudioBuffer; audioBuffer != NULL; audioBuffer = audioBuffer->next)
+        {
+            // Ignore stopped or paused sounds.
+            if (!audioBuffer->playing || audioBuffer->paused) {
+                continue;
+            }
+
+            mal_uint32 framesRead = 0;
+            for (;;) {
+                if (framesRead > frameCount) {
+                    TraceLog(LOG_DEBUG, "Mixed too many frames from audio buffer");
+                    break;
+                }
+                if (framesRead == frameCount) {
+                    break;
+                }
+
+                // Just read as much data as we can from the stream.
+                mal_uint32 framesToRead = (frameCount - framesRead);
+                while (framesToRead > 0) {
+                    float tempBuffer[1024]; // 512 frames for stereo.
+
+                    mal_uint32 framesToReadRightNow = framesToRead;
+                    if (framesToReadRightNow > sizeof(tempBuffer)/sizeof(tempBuffer[0])/DEVICE_CHANNELS) {
+                        framesToReadRightNow = sizeof(tempBuffer)/sizeof(tempBuffer[0])/DEVICE_CHANNELS;
+                    }
+
+                    // If we're not looping, we need to make sure we flush the internal buffers of the DSP pipeline to ensure we get the
+                    // last few samples.
+                    mal_bool32 flushDSP = !audioBuffer->looping;
+
+                    mal_uint32 framesJustRead = mal_dsp_read_frames_ex(&audioBuffer->dsp, framesToReadRightNow, tempBuffer, flushDSP);
+                    if (framesJustRead > 0) {
+                        float* framesOut = (float*)pFramesOut + (framesRead * device.channels);
+                        float* framesIn  = tempBuffer;
+                        MixFrames(framesOut, framesIn, framesJustRead, audioBuffer->volume);
+
+                        framesToRead -= framesJustRead;
+                        framesRead += framesJustRead;
+                    }
+
+                    // If we weren't able to read all the frames we requested, break.
+                    if (framesJustRead < framesToReadRightNow) {
+                        if (!audioBuffer->looping) {
+                            StopAudioBuffer(audioBuffer);
+                            break;
+                        } else {
+                            // Should never get here, but just for safety, move the cursor position back to the start and continue the loop.
+                            audioBuffer->frameCursorPos = 0;
+                            continue;
+                        }
+                    }
+                }
+
+                // If for some reason we weren't able to read every frame we'll need to break from the loop. Not doing this could
+                // theoretically put us into an infinite loop.
+                if (framesToRead > 0) {
+                    break;
+                }
+            }
+        }
+    }
+    mal_mutex_unlock(&audioLock);
+
+    return frameCount;  // We always output the same number of frames that were originally requested.
+}
+#endif
 
 // Initialize audio device
 void InitAudioDevice(void)
 {
+#if USE_MINI_AL
+    // Context.
+    mal_context_config contextConfig = mal_context_config_init(OnLog_MAL);
+    mal_result result = mal_context_init(NULL, 0, &contextConfig, &context);
+    if (result != MAL_SUCCESS)
+    {
+        TraceLog(LOG_ERROR, "Failed to initialize audio context");
+        return;
+    }
+
+    // Device. Using the default device. Format is floating point because it simplifies mixing.
+    mal_device_config deviceConfig = mal_device_config_init(DEVICE_FORMAT, DEVICE_CHANNELS, DEVICE_SAMPLE_RATE, NULL, OnSendAudioDataToDevice);
+
+    // Special case for PLATFORM_RPI.
+//#if defined(PLATFORM_RPI)
+//    deviceConfig.alsa.noMMap = MAL_TRUE;
+//    deviceConfig.bufferSizeInFrames = 2048;
+//#endif
+
+    result = mal_device_init(&context, mal_device_type_playback, NULL, &deviceConfig, NULL, &device);
+    if (result != MAL_SUCCESS)
+    {
+        TraceLog(LOG_ERROR, "Failed to initialize audio playback device");
+        mal_context_uninit(&context);
+        return;
+    }
+
+    // Keep the device running the whole time. May want to consider doing something a bit smarter and only have the device running
+    // while there's at least one sound being played.
+    result = mal_device_start(&device);
+    if (result != MAL_SUCCESS)
+    {
+        TraceLog(LOG_ERROR, "Failed to start audio playback device");
+        mal_device_uninit(&device);
+        mal_context_uninit(&context);
+        return;
+    }
+
+    // Mixing happens on a seperate thread which means we need to synchronize. I'm using a mutex here to make things simple, but may
+    // want to look at something a bit smarter later on to keep everything real-time, if that's necessary.
+    if (mal_mutex_init(&context, &audioLock) != MAL_SUCCESS)
+    {
+        TraceLog(LOG_ERROR, "Failed to create mutex for audio mixing");
+        mal_device_uninit(&device);
+        mal_context_uninit(&context);
+        return;
+    }
+
+    TraceLog(LOG_INFO, "Audio device initialized successfully: %s", device.name);
+    TraceLog(LOG_INFO, "Audio backend: mini_al / %s", mal_get_backend_name(context.backend));
+    TraceLog(LOG_INFO, "Audio format: %s -> %s", mal_get_format_name(device.format), mal_get_format_name(device.internalFormat));
+    TraceLog(LOG_INFO, "Audio channels: %d -> %d", device.channels, device.internalChannels);
+    TraceLog(LOG_INFO, "Audio sample rate: %d -> %d", device.sampleRate, device.internalSampleRate);
+    TraceLog(LOG_INFO, "Audio buffer size: %d", device.bufferSizeInFrames);
+
+    isAudioInitialized = MAL_TRUE;
+#else
     // Open and initialize a device with default settings
     ALCdevice *device = alcOpenDevice(NULL);
 
@@ -232,13 +472,30 @@ void InitAudioDevice(void)
             alListener3f(AL_ORIENTATION, 0.0f, 0.0f, -1.0f);
             
             alListenerf(AL_GAIN, 1.0f);
+
+            if (alIsExtensionPresent("AL_EXT_float32")) {
+                TraceLog(LOG_INFO, "AL_EXT_float32 supported");
+            } else {
+                TraceLog(LOG_INFO, "AL_EXT_float32 not supported");
+            }
         }
     }
+#endif
 }
 
 // Close the audio device for all contexts
 void CloseAudioDevice(void)
 {
+#if USE_MINI_AL
+    if (!isAudioInitialized) {
+        TraceLog(LOG_WARNING, "Could not close audio device because it is not currently initialized");
+        return;
+    }
+
+    mal_mutex_uninit(&audioLock);
+    mal_device_uninit(&device);
+    mal_context_uninit(&context);
+#else
     ALCdevice *device;
     ALCcontext *context = alcGetCurrentContext();
 
@@ -249,6 +506,7 @@ void CloseAudioDevice(void)
     alcMakeContextCurrent(NULL);
     alcDestroyContext(context);
     alcCloseDevice(device);
+#endif
 
     TraceLog(LOG_INFO, "Audio device closed successfully");
 }
@@ -256,6 +514,9 @@ void CloseAudioDevice(void)
 // Check if device has been initialized successfully
 bool IsAudioDeviceReady(void)
 {
+#if USE_MINI_AL
+    return isAudioInitialized;
+#else
     ALCcontext *context = alcGetCurrentContext();
 
     if (context == NULL) return false;
@@ -266,6 +527,7 @@ bool IsAudioDeviceReady(void)
         if (device == NULL) return false;
         else return true;
     }
+#endif
 }
 
 // Set master volume (listener)
@@ -273,10 +535,270 @@ void SetMasterVolume(float volume)
 {
     if (volume < 0.0f) volume = 0.0f;
     else if (volume > 1.0f) volume = 1.0f;
-    
+ 
+#if USE_MINI_AL
+    masterVolume = 1;
+#else
     alListenerf(AL_GAIN, volume);
+#endif
 }
 
+
+//----------------------------------------------------------------------------------
+// Audio Buffer
+//----------------------------------------------------------------------------------
+#if USE_MINI_AL
+static mal_uint32 AudioBuffer_OnDSPRead(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
+{
+    AudioBuffer* audioBuffer = (AudioBuffer*)pUserData;
+
+    mal_uint32 subBufferSizeInFrames = audioBuffer->bufferSizeInFrames / 2;
+    mal_uint32 currentSubBufferIndex = audioBuffer->frameCursorPos / subBufferSizeInFrames;
+    if (currentSubBufferIndex > 1) {
+        TraceLog(LOG_DEBUG, "Frame cursor position moved too far forward in audio stream");
+        return 0;
+    }
+
+    // Another thread can update the processed state of buffers so we just take a copy here to try and avoid potential synchronization problems.
+    bool isSubBufferProcessed[2];
+    isSubBufferProcessed[0] = audioBuffer->isSubBufferProcessed[0];
+    isSubBufferProcessed[1] = audioBuffer->isSubBufferProcessed[1];
+
+    mal_uint32 frameSizeInBytes = mal_get_sample_size_in_bytes(audioBuffer->dsp.config.formatIn) * audioBuffer->dsp.config.channelsIn;
+
+    // Fill out every frame until we find a buffer that's marked as processed. Then fill the remainder with 0.
+    mal_uint32 framesRead = 0;
+    for (;;)
+    {
+        // We break from this loop differently depending on the buffer's usage. For static buffers, we simply fill as much data as we can. For
+        // streaming buffers we only fill the halves of the buffer that are processed. Unprocessed halves must keep their audio data in-tact.
+        if (audioBuffer->usage == AUDIO_BUFFER_USAGE_STATIC) {
+            if (framesRead >= frameCount) {
+                break;
+            }
+        } else {
+            if (isSubBufferProcessed[currentSubBufferIndex]) {
+                break;
+            }
+        }
+
+        mal_uint32 totalFramesRemaining = (frameCount - framesRead);
+        if (totalFramesRemaining == 0) {
+            break;
+        }
+
+        mal_uint32 framesRemainingInOutputBuffer;
+        if (audioBuffer->usage == AUDIO_BUFFER_USAGE_STATIC) {
+            framesRemainingInOutputBuffer = audioBuffer->bufferSizeInFrames - audioBuffer->frameCursorPos;
+        } else {
+            mal_uint32 firstFrameIndexOfThisSubBuffer = subBufferSizeInFrames * currentSubBufferIndex;
+            framesRemainingInOutputBuffer = subBufferSizeInFrames - (audioBuffer->frameCursorPos - firstFrameIndexOfThisSubBuffer);
+        }
+
+
+
+        mal_uint32 framesToRead = totalFramesRemaining;
+        if (framesToRead > framesRemainingInOutputBuffer) {
+            framesToRead = framesRemainingInOutputBuffer;
+        }
+
+        memcpy((unsigned char*)pFramesOut + (framesRead*frameSizeInBytes), audioBuffer->buffer + (audioBuffer->frameCursorPos*frameSizeInBytes), framesToRead*frameSizeInBytes);
+        audioBuffer->frameCursorPos = (audioBuffer->frameCursorPos + framesToRead) % audioBuffer->bufferSizeInFrames;
+        framesRead += framesToRead;
+
+        // If we've read to the end of the buffer, mark it as processed.
+        if (framesToRead == framesRemainingInOutputBuffer) {
+            audioBuffer->isSubBufferProcessed[currentSubBufferIndex] = true;
+            isSubBufferProcessed[currentSubBufferIndex] = true;
+
+            currentSubBufferIndex = (currentSubBufferIndex + 1) % 2;
+
+            // We need to break from this loop if we're not looping.
+            if (!audioBuffer->looping) {
+                StopAudioBuffer(audioBuffer);
+                break;
+            }
+        }
+    }
+
+    // Zero-fill excess.
+    mal_uint32 totalFramesRemaining = (frameCount - framesRead);
+    if (totalFramesRemaining > 0) {
+        memset((unsigned char*)pFramesOut + (framesRead*frameSizeInBytes), 0, totalFramesRemaining*frameSizeInBytes);
+
+        // For static buffers we can fill the remaining frames with silence for safety, but we don't want
+        // to report those frames as "read". The reason for this is that the caller uses the return value
+        // to know whether or not a non-looping sound has finished playback.
+        if (audioBuffer->usage != AUDIO_BUFFER_USAGE_STATIC) {
+            framesRead += totalFramesRemaining;
+        }
+    }
+
+    return framesRead;
+}
+
+// Create a new audio buffer. Initially filled with silence.
+AudioBuffer* CreateAudioBuffer(mal_format format, mal_uint32 channels, mal_uint32 sampleRate, mal_uint32 bufferSizeInFrames, AudioBufferUsage usage)
+{
+    AudioBuffer* audioBuffer = (AudioBuffer*)calloc(sizeof(*audioBuffer) + (bufferSizeInFrames*channels*mal_get_sample_size_in_bytes(format)), 1);
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "CreateAudioBuffer() : Failed to allocate memory for audio buffer");
+        return NULL;
+    }
+
+    // We run audio data through a format converter.
+    mal_dsp_config dspConfig;
+    memset(&dspConfig, 0, sizeof(dspConfig));
+    dspConfig.formatIn = format;
+    dspConfig.formatOut = DEVICE_FORMAT;
+    dspConfig.channelsIn = channels;
+    dspConfig.channelsOut = DEVICE_CHANNELS;
+    dspConfig.sampleRateIn = sampleRate;
+    dspConfig.sampleRateOut = DEVICE_SAMPLE_RATE;
+    mal_result resultMAL = mal_dsp_init(&dspConfig, AudioBuffer_OnDSPRead, audioBuffer, &audioBuffer->dsp);
+    if (resultMAL != MAL_SUCCESS) {
+        TraceLog(LOG_ERROR, "LoadSoundFromWave() : Failed to create data conversion pipeline");
+        free(audioBuffer);
+        return NULL;
+    }
+
+    audioBuffer->volume = 1;
+    audioBuffer->pitch = 1;
+    audioBuffer->playing = 0;
+    audioBuffer->paused = 0;
+    audioBuffer->looping = 0;
+    audioBuffer->usage = usage;
+    audioBuffer->bufferSizeInFrames = bufferSizeInFrames;
+    audioBuffer->frameCursorPos = 0;
+
+    // Buffers should be marked as processed by default so that a call to UpdateAudioStream() immediately after initialization works correctly.
+    audioBuffer->isSubBufferProcessed[0] = true;
+    audioBuffer->isSubBufferProcessed[1] = true;
+
+    TrackAudioBuffer(audioBuffer);
+
+    return audioBuffer;
+}
+
+// Delete an audio buffer.
+void DeleteAudioBuffer(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    UntrackAudioBuffer(audioBuffer);
+    free(audioBuffer);
+}
+
+// Check if an audio buffer is playing.
+bool IsAudioBufferPlaying(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return false;
+    }
+
+    return audioBuffer->playing && !audioBuffer->paused;
+}
+
+// Play an audio buffer.
+//
+// This will restart the buffer from the start. Use PauseAudioBuffer() and ResumeAudioBuffer() if the playback position
+// should be maintained.
+void PlayAudioBuffer(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    audioBuffer->playing = true;
+    audioBuffer->paused = false;
+    audioBuffer->frameCursorPos = 0;
+}
+
+// Stop an audio buffer.
+void StopAudioBuffer(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    // Don't do anything if the audio buffer is already stopped.
+    if (!IsAudioBufferPlaying(audioBuffer))
+    {
+        return;
+    }
+
+    audioBuffer->playing = false;
+    audioBuffer->paused = false;
+    audioBuffer->frameCursorPos = 0;
+    audioBuffer->isSubBufferProcessed[0] = true;
+    audioBuffer->isSubBufferProcessed[1] = true;
+}
+
+// Pause an audio buffer.
+void PauseAudioBuffer(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    audioBuffer->paused = true;
+}
+
+// Resume an audio buffer.
+void ResumeAudioBuffer(AudioBuffer* audioBuffer)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    audioBuffer->paused = false;
+}
+
+// Set volume for an audio buffer.
+void SetAudioBufferVolume(AudioBuffer* audioBuffer, float volume)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    audioBuffer->volume = volume;
+}
+
+// Set pitch for an audio buffer.
+void SetAudioBufferPitch(AudioBuffer* audioBuffer, float pitch)
+{
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayAudioBuffer() : No audio buffer");
+        return;
+    }
+
+    audioBuffer->pitch = pitch;
+
+    // Pitching is just an adjustment of the sample rate. Note that this changes the duration of the sound - higher pitches
+    // will make the sound faster; lower pitches make it slower.
+    mal_uint32 newOutputSampleRate = (mal_uint32)((((float)audioBuffer->dsp.config.sampleRateOut / (float)audioBuffer->dsp.config.sampleRateIn) / pitch) * audioBuffer->dsp.config.sampleRateIn);
+    mal_dsp_set_output_sample_rate(&audioBuffer->dsp, newOutputSampleRate);
+}
+#endif
+
 //----------------------------------------------------------------------------------
 // Module Functions Definition - Sounds loading and playing (.WAV)
 //----------------------------------------------------------------------------------
@@ -351,6 +873,39 @@ Sound LoadSoundFromWave(Wave wave)
 
     if (wave.data != NULL)
     {
+#if USE_MINI_AL
+        // When using mini_al we need to do our own mixing. To simplify this we need convert the format of each sound to be consistent with
+        // the format used to open the playback device. We can do this two ways:
+        // 
+        //   1) Convert the whole sound in one go at load time (here).
+        //   2) Convert the audio data in chunks at mixing time.
+        //
+        // I have decided on the first option because it offloads work required for the format conversion to the to the loading stage. The
+        // downside to this is that it uses more memory if the original sound is u8 or s16.
+        mal_format formatIn  = ((wave.sampleSize == 8) ? mal_format_u8 : ((wave.sampleSize == 16) ? mal_format_s16 : mal_format_f32));
+        mal_uint32 frameCountIn = wave.sampleCount;  // Is wave->sampleCount actually the frame count? That terminology needs to change, if so.
+
+        mal_uint32 frameCount = mal_convert_frames(NULL, DEVICE_FORMAT, DEVICE_CHANNELS, DEVICE_SAMPLE_RATE, NULL, formatIn, wave.channels, wave.sampleRate, frameCountIn);
+        if (frameCount == 0) {
+            TraceLog(LOG_ERROR, "LoadSoundFromWave() : Failed to get frame count for format conversion");
+        }
+
+
+        AudioBuffer* audioBuffer = CreateAudioBuffer(DEVICE_FORMAT, DEVICE_CHANNELS, DEVICE_SAMPLE_RATE, frameCount, AUDIO_BUFFER_USAGE_STATIC);
+        if (audioBuffer == NULL)
+        {
+            TraceLog(LOG_ERROR, "LoadSoundFromWave() : Failed to create audio buffer");
+        }
+
+
+        frameCount = mal_convert_frames(audioBuffer->buffer, audioBuffer->dsp.config.formatIn, audioBuffer->dsp.config.channelsIn, audioBuffer->dsp.config.sampleRateIn, wave.data, formatIn, wave.channels, wave.sampleRate, frameCountIn);
+        if (frameCount == 0)
+        {
+            TraceLog(LOG_ERROR, "LoadSoundFromWave() : Format conversion failed");
+        }
+
+        sound.audioBuffer = audioBuffer;
+#else
         ALenum format = 0;
 
         // The OpenAL format is worked out by looking at the number of channels and the sample size (bits per sample)
@@ -404,6 +959,7 @@ Sound LoadSoundFromWave(Wave wave)
         sound.source = source;
         sound.buffer = buffer;
         sound.format = format;
+#endif
     }
 
     return sound;
@@ -420,10 +976,14 @@ void UnloadWave(Wave wave)
 // Unload sound
 void UnloadSound(Sound sound)
 {
+#if USE_MINI_AL
+    DeleteAudioBuffer((AudioBuffer*)sound.audioBuffer);
+#else
     alSourceStop(sound.source);
 
     alDeleteSources(1, &sound.source);
     alDeleteBuffers(1, &sound.buffer);
+#endif
 
     TraceLog(LOG_INFO, "[SND ID %i][BUFR ID %i] Unloaded sound data from RAM", sound.source, sound.buffer);
 }
@@ -432,6 +992,19 @@ void UnloadSound(Sound sound)
 // NOTE: data must match sound.format
 void UpdateSound(Sound sound, const void *data, int samplesCount)
 {
+#if USE_MINI_AL
+    AudioBuffer* audioBuffer = (AudioBuffer*)sound.audioBuffer;
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "UpdateSound() : Invalid sound - no audio buffer");
+        return;
+    }
+
+    StopAudioBuffer(audioBuffer);
+
+    // TODO: May want to lock/unlock this since this data buffer is read at mixing time.
+    memcpy(audioBuffer->buffer, data, samplesCount*audioBuffer->dsp.config.channelsIn*mal_get_sample_size_in_bytes(audioBuffer->dsp.config.formatIn));
+#else
     ALint sampleRate, sampleSize, channels;
     alGetBufferi(sound.buffer, AL_FREQUENCY, &sampleRate);
     alGetBufferi(sound.buffer, AL_BITS, &sampleSize);           // It could also be retrieved from sound.format
@@ -453,12 +1026,17 @@ void UpdateSound(Sound sound, const void *data, int samplesCount)
 
     // Attach sound buffer to source again
     alSourcei(sound.source, AL_BUFFER, sound.buffer);
+#endif
 }
 
 // Play a sound
 void PlaySound(Sound sound)
 {
+#if USE_MINI_AL
+    PlayAudioBuffer((AudioBuffer*)sound.audioBuffer);
+#else
     alSourcePlay(sound.source);        // Play the sound
+#endif
 
     //TraceLog(LOG_INFO, "Playing sound");
 
@@ -479,28 +1057,43 @@ void PlaySound(Sound sound)
 // Pause a sound
 void PauseSound(Sound sound)
 {
+#if USE_MINI_AL
+    PauseAudioBuffer((AudioBuffer*)sound.audioBuffer);
+#else
     alSourcePause(sound.source);
+#endif
 }
 
 // Resume a paused sound
 void ResumeSound(Sound sound)
 {
+#if USE_MINI_AL
+    ResumeAudioBuffer((AudioBuffer*)sound.audioBuffer);
+#else
     ALenum state;
 
     alGetSourcei(sound.source, AL_SOURCE_STATE, &state);
 
     if (state == AL_PAUSED) alSourcePlay(sound.source);
+#endif
 }
 
 // Stop reproducing a sound
 void StopSound(Sound sound)
 {
+#if USE_MINI_AL
+    StopAudioBuffer((AudioBuffer*)sound.audioBuffer);
+#else
     alSourceStop(sound.source);
+#endif
 }
 
 // Check if a sound is playing
 bool IsSoundPlaying(Sound sound)
 {
+#if USE_MINI_AL
+    return IsAudioBufferPlaying((AudioBuffer*)sound.audioBuffer);
+#else
     bool playing = false;
     ALint state;
 
@@ -508,23 +1101,59 @@ bool IsSoundPlaying(Sound sound)
     if (state == AL_PLAYING) playing = true;
 
     return playing;
+#endif
 }
 
 // Set volume for a sound
 void SetSoundVolume(Sound sound, float volume)
 {
+#if USE_MINI_AL
+    SetAudioBufferVolume((AudioBuffer*)sound.audioBuffer, volume);
+#else
     alSourcef(sound.source, AL_GAIN, volume);
+#endif
 }
 
 // Set pitch for a sound
 void SetSoundPitch(Sound sound, float pitch)
 {
+#if USE_MINI_AL
+    SetAudioBufferPitch((AudioBuffer*)sound.audioBuffer, pitch);
+#else
     alSourcef(sound.source, AL_PITCH, pitch);
+#endif
 }
 
 // Convert wave data to desired format
 void WaveFormat(Wave *wave, int sampleRate, int sampleSize, int channels)
 {
+    mal_format formatIn  = ((wave->sampleSize == 8) ? mal_format_u8 : ((wave->sampleSize == 16) ? mal_format_s16 : mal_format_f32));
+    mal_format formatOut = ((      sampleSize == 8) ? mal_format_u8 : ((      sampleSize == 16) ? mal_format_s16 : mal_format_f32));
+
+    mal_uint32 frameCountIn = wave->sampleCount;  // Is wave->sampleCount actually the frame count? That terminology needs to change, if so.
+
+    mal_uint32 frameCount = mal_convert_frames(NULL, formatOut, channels, sampleRate, NULL, formatIn, wave->channels, wave->sampleRate, frameCountIn);
+    if (frameCount == 0) {
+        TraceLog(LOG_ERROR, "WaveFormat() : Failed to get frame count for format conversion.");
+        return;
+    }
+
+    void* data = malloc(frameCount * channels * (sampleSize/8));
+
+    frameCount = mal_convert_frames(data, formatOut, channels, sampleRate, wave->data, formatIn, wave->channels, wave->sampleRate, frameCountIn);
+    if (frameCount == 0) {
+        TraceLog(LOG_ERROR, "WaveFormat() : Format conversion failed.");
+        return;
+    }
+
+    wave->sampleCount = frameCount;
+    wave->sampleSize = sampleSize;
+    wave->sampleRate = sampleRate;
+    wave->channels = channels;
+    free(wave->data);
+    wave->data = data;
+
+#if 0
     // Format sample rate
     // NOTE: Only supported 22050 <--> 44100
     if (wave->sampleRate != sampleRate)
@@ -603,6 +1232,7 @@ void WaveFormat(Wave *wave, int sampleRate, int sampleSize, int channels)
         free(wave->data);
         wave->data = data;
     }
+#endif
 }
 
 // Copy a wave to a new wave
@@ -785,18 +1415,44 @@ void UnloadMusicStream(Music music)
 // Start music playing (open stream)
 void PlayMusicStream(Music music)
 {
+#if USE_MINI_AL
+    AudioBuffer* audioBuffer = (AudioBuffer*)music->stream.audioBuffer;
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "PlayMusicStream() : No audio buffer");
+        return;
+    }
+
+    // For music streams, we need to make sure we maintain the frame cursor position. This is hack for this section of code in UpdateMusicStream()
+    //     // NOTE: In case window is minimized, music stream is stopped,
+    //     // just make sure to play again on window restore
+    //     if (IsMusicPlaying(music)) PlayMusicStream(music);
+    mal_uint32 frameCursorPos = audioBuffer->frameCursorPos;
+    {
+        PlayAudioStream(music->stream); // <-- This resets the cursor position.
+    }
+    audioBuffer->frameCursorPos = frameCursorPos;
+#else
     alSourcePlay(music->stream.source);
+#endif
 }
 
 // Pause music playing
 void PauseMusicStream(Music music)
 {
+#if USE_MINI_AL
+    PauseAudioStream(music->stream);
+#else
     alSourcePause(music->stream.source);
+#endif
 }
 
 // Resume music playing
 void ResumeMusicStream(Music music)
 {
+#if USE_MINI_AL
+    ResumeAudioStream(music->stream);
+#else
     ALenum state;
     alGetSourcei(music->stream.source, AL_SOURCE_STATE, &state);
 
@@ -805,12 +1461,16 @@ void ResumeMusicStream(Music music)
         TraceLog(LOG_INFO, "[AUD ID %i] Resume music stream playing", music->stream.source);
         alSourcePlay(music->stream.source);
     }
+#endif
 }
 
 // Stop music playing (close stream)
 // TODO: To clear a buffer, make sure they have been already processed!
 void StopMusicStream(Music music)
 {
+#if USE_MINI_AL
+    StopAudioStream(music->stream);
+#else
     alSourceStop(music->stream.source);
     
     /*
@@ -826,6 +1486,7 @@ void StopMusicStream(Music music)
 
     free(pcm);
     */
+#endif
     
     // Restart music context
     switch (music->ctxType)
@@ -850,6 +1511,86 @@ void StopMusicStream(Music music)
 // TODO: Make sure buffers are ready for update... check music state
 void UpdateMusicStream(Music music)
 {
+#if USE_MINI_AL
+    bool streamEnding = false;
+
+    unsigned int subBufferSizeInFrames = ((AudioBuffer*)music->stream.audioBuffer)->bufferSizeInFrames / 2;
+
+    // NOTE: Using dynamic allocation because it could require more than 16KB
+    void *pcm = calloc(subBufferSizeInFrames*music->stream.sampleSize/8*music->stream.channels, 1);
+
+    int samplesCount = 0;    // Total size of data steamed in L+R samples for xm floats, individual L or R for ogg shorts
+
+    while (IsAudioBufferProcessed(music->stream))
+    {
+        if (music->samplesLeft >= subBufferSizeInFrames) samplesCount = subBufferSizeInFrames;
+        else samplesCount = music->samplesLeft;
+
+        // TODO: Really don't like ctxType thingy...
+        switch (music->ctxType)
+        {
+            case MUSIC_AUDIO_OGG:
+            {
+                // NOTE: Returns the number of samples to process (be careful! we ask for number of shorts!)
+                int numSamplesOgg = stb_vorbis_get_samples_short_interleaved(music->ctxOgg, music->stream.channels, (short *)pcm, samplesCount*music->stream.channels);
+
+            } break;
+        #if defined(SUPPORT_FILEFORMAT_FLAC)
+            case MUSIC_AUDIO_FLAC:
+            {
+                // NOTE: Returns the number of samples to process
+                unsigned int numSamplesFlac = (unsigned int)drflac_read_s16(music->ctxFlac, samplesCount*music->stream.channels, (short *)pcm);
+
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_XM)
+            case MUSIC_MODULE_XM: jar_xm_generate_samples_16bit(music->ctxXm, pcm, samplesCount); break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_MOD)
+            case MUSIC_MODULE_MOD: jar_mod_fillbuffer(&music->ctxMod, pcm, samplesCount, 0); break;
+        #endif
+            default: break;
+        }
+
+        UpdateAudioStream(music->stream, pcm, samplesCount);
+        music->samplesLeft -= samplesCount;
+
+        if (music->samplesLeft <= 0)
+        {
+            streamEnding = true;
+            break;
+        }
+    }
+
+    // Free allocated pcm data
+    free(pcm);
+
+    // Reset audio stream for looping
+    if (streamEnding)
+    {
+        StopMusicStream(music);        // Stop music (and reset)
+            
+        // Decrease loopCount to stop when required
+        if (music->loopCount > 0)
+        {
+            music->loopCount--;        // Decrease loop count
+            PlayMusicStream(music);    // Play again
+        }
+        else
+        {
+            if (music->loopCount == -1)
+            {
+                PlayMusicStream(music);
+            }
+        }
+    }
+    else
+    {
+        // NOTE: In case window is minimized, music stream is stopped,
+        // just make sure to play again on window restore
+        if (IsMusicPlaying(music)) PlayMusicStream(music);
+    }
+#else
     ALenum state;
     ALint processed = 0;
 
@@ -922,6 +1663,13 @@ void UpdateMusicStream(Music music)
                 music->loopCount--;        // Decrease loop count
                 PlayMusicStream(music);    // Play again
             }
+            else
+            {
+                if (music->loopCount == -1)
+                {
+                    PlayMusicStream(music);
+                }
+            }
         }
         else
         {
@@ -930,11 +1678,15 @@ void UpdateMusicStream(Music music)
             if (state != AL_PLAYING) PlayMusicStream(music);
         }
     }
+#endif
 }
 
 // Check if any music is playing
 bool IsMusicPlaying(Music music)
 {
+#if USE_MINI_AL
+    return IsAudioStreamPlaying(music->stream);
+#else
     bool playing = false;
     ALint state;
 
@@ -943,23 +1695,32 @@ bool IsMusicPlaying(Music music)
     if (state == AL_PLAYING) playing = true;
 
     return playing;
+#endif
 }
 
 // Set volume for music
 void SetMusicVolume(Music music, float volume)
 {
+#if USE_MINI_AL
+    SetAudioStreamVolume(music->stream, volume);
+#else
     alSourcef(music->stream.source, AL_GAIN, volume);
+#endif
 }
 
 // Set pitch for music
 void SetMusicPitch(Music music, float pitch)
 {
+#if USE_MINI_AL
+    SetAudioStreamPitch(music->stream, pitch);
+#else
     alSourcef(music->stream.source, AL_PITCH, pitch);
+#endif
 }
 
 // Set music loop count (loop repeats)
 // NOTE: If set to -1, means infinite loop
-void SetMusicLoopCount(Music music, float count)
+void SetMusicLoopCount(Music music, int count)
 {
     music->loopCount = count;
 }
@@ -983,6 +1744,7 @@ float GetMusicTimePlayed(Music music)
     return secondsPlayed;
 }
 
+
 // Init audio stream (to stream audio pcm data)
 AudioStream InitAudioStream(unsigned int sampleRate, unsigned int sampleSize, unsigned int channels)
 {
@@ -999,6 +1761,27 @@ AudioStream InitAudioStream(unsigned int sampleRate, unsigned int sampleSize, un
         stream.channels = 1;  // Fallback to mono channel
     }
 
+
+#if USE_MINI_AL
+    mal_format formatIn = ((stream.sampleSize == 8) ? mal_format_u8 : ((stream.sampleSize == 16) ? mal_format_s16 : mal_format_f32));
+
+    // The size of a streaming buffer must be at least double the size of a period.
+    unsigned int periodSize = device.bufferSizeInFrames / device.periods;
+    unsigned int subBufferSize = AUDIO_BUFFER_SIZE;
+    if (subBufferSize < periodSize) {
+        subBufferSize = periodSize;
+    }
+
+    AudioBuffer* audioBuffer = CreateAudioBuffer(formatIn, stream.channels, stream.sampleRate, subBufferSize*2, AUDIO_BUFFER_USAGE_STREAM);
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "InitAudioStream() : Failed to create audio buffer");
+        return stream;
+    }
+
+    audioBuffer->looping = true;    // Always loop for streaming buffers.
+    stream.audioBuffer = audioBuffer;
+#else
     // Setup OpenAL format
     if (stream.channels == 1)
     {
@@ -1043,6 +1826,7 @@ AudioStream InitAudioStream(unsigned int sampleRate, unsigned int sampleSize, un
     free(pcm);
 
     alSourceQueueBuffers(stream.source, MAX_STREAM_BUFFERS, stream.buffers);
+#endif
 
     TraceLog(LOG_INFO, "[AUD ID %i] Audio stream loaded successfully (%i Hz, %i bit, %s)", stream.source, stream.sampleRate, stream.sampleSize, (stream.channels == 1) ? "Mono" : "Stereo");
 
@@ -1052,6 +1836,9 @@ AudioStream InitAudioStream(unsigned int sampleRate, unsigned int sampleSize, un
 // Close audio stream and free memory
 void CloseAudioStream(AudioStream stream)
 {
+#if USE_MINI_AL
+    DeleteAudioBuffer((AudioBuffer*)stream.audioBuffer);
+#else
     // Stop playing channel
     alSourceStop(stream.source);
 
@@ -1070,7 +1857,8 @@ void CloseAudioStream(AudioStream stream)
     // Delete source and buffers
     alDeleteSources(1, &stream.source);
     alDeleteBuffers(MAX_STREAM_BUFFERS, stream.buffers);
-
+#endif
+    
     TraceLog(LOG_INFO, "[AUD ID %i] Unloaded audio stream data", stream.source);
 }
 
@@ -1079,6 +1867,63 @@ void CloseAudioStream(AudioStream stream)
 // NOTE 2: To unqueue a buffer it needs to be processed: IsAudioBufferProcessed()
 void UpdateAudioStream(AudioStream stream, const void *data, int samplesCount)
 {
+#if USE_MINI_AL
+    AudioBuffer* audioBuffer = (AudioBuffer*)stream.audioBuffer;
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "UpdateAudioStream() : No audio buffer");
+        return;
+    }
+
+    if (audioBuffer->isSubBufferProcessed[0] || audioBuffer->isSubBufferProcessed[1])
+    {
+        mal_uint32 subBufferToUpdate;
+        if (audioBuffer->isSubBufferProcessed[0] && audioBuffer->isSubBufferProcessed[1])
+        {
+            // Both buffers are available for updating. Update the first one and make sure the cursor is moved back to the front.
+            subBufferToUpdate = 0;
+            audioBuffer->frameCursorPos = 0;
+        }
+        else
+        {
+            // Just update whichever sub-buffer is processed.
+            subBufferToUpdate = (audioBuffer->isSubBufferProcessed[0]) ? 0 : 1;
+        }
+
+        mal_uint32 subBufferSizeInFrames = audioBuffer->bufferSizeInFrames/2;
+        unsigned char *subBuffer = audioBuffer->buffer + ((subBufferSizeInFrames * stream.channels * (stream.sampleSize/8)) * subBufferToUpdate);
+
+        // Does this API expect a whole buffer to be updated in one go? Assuming so, but if not will need to change this logic.
+        if (subBufferSizeInFrames >= (mal_uint32)samplesCount)
+        {
+            mal_uint32 framesToWrite = subBufferSizeInFrames;
+            if (framesToWrite > (mal_uint32)samplesCount) {
+                framesToWrite = (mal_uint32)samplesCount;
+            }
+
+            mal_uint32 bytesToWrite = framesToWrite * stream.channels * (stream.sampleSize/8);
+            memcpy(subBuffer, data, bytesToWrite);
+
+            // Any leftover frames should be filled with zeros.
+            mal_uint32 leftoverFrameCount = subBufferSizeInFrames - framesToWrite;
+            if (leftoverFrameCount > 0) {
+                memset(subBuffer + bytesToWrite, 0, leftoverFrameCount * stream.channels * (stream.sampleSize/8));
+            }
+
+            audioBuffer->isSubBufferProcessed[subBufferToUpdate] = false;
+        }
+        else
+        {
+            TraceLog(LOG_ERROR, "UpdateAudioStream() : Attempting to write too many frames to buffer");
+            return;
+        }
+    }
+    else
+    {
+        TraceLog(LOG_ERROR, "Audio buffer not available for updating");
+        return;
+    }
+#else
     ALuint buffer = 0;
     alSourceUnqueueBuffers(stream.source, 1, &buffer);
 
@@ -1089,44 +1934,107 @@ void UpdateAudioStream(AudioStream stream, const void *data, int samplesCount)
         alSourceQueueBuffers(stream.source, 1, &buffer);
     }
     else TraceLog(LOG_WARNING, "[AUD ID %i] Audio buffer not available for unqueuing", stream.source);
+#endif
 }
 
 // Check if any audio stream buffers requires refill
 bool IsAudioBufferProcessed(AudioStream stream)
 {
+#if USE_MINI_AL
+    AudioBuffer* audioBuffer = (AudioBuffer*)stream.audioBuffer;
+    if (audioBuffer == NULL)
+    {
+        TraceLog(LOG_ERROR, "IsAudioBufferProcessed() : No audio buffer");
+        return false;
+    }
+
+    return audioBuffer->isSubBufferProcessed[0] || audioBuffer->isSubBufferProcessed[1];
+#else
     ALint processed = 0;
 
     // Determine if music stream is ready to be written
     alGetSourcei(stream.source, AL_BUFFERS_PROCESSED, &processed);
 
     return (processed > 0);
+#endif
 }
 
 // Play audio stream
 void PlayAudioStream(AudioStream stream)
 {
+#if USE_MINI_AL
+    PlayAudioBuffer((AudioBuffer*)stream.audioBuffer);
+#else
     alSourcePlay(stream.source);
+#endif
 }
 
 // Play audio stream
 void PauseAudioStream(AudioStream stream)
 {
+#if USE_MINI_AL
+    PauseAudioBuffer((AudioBuffer*)stream.audioBuffer);
+#else
     alSourcePause(stream.source);
+#endif
 }
 
 // Resume audio stream playing
 void ResumeAudioStream(AudioStream stream)
 {
+#if USE_MINI_AL
+    ResumeAudioBuffer((AudioBuffer*)stream.audioBuffer);
+#else
     ALenum state;
     alGetSourcei(stream.source, AL_SOURCE_STATE, &state);
 
     if (state == AL_PAUSED) alSourcePlay(stream.source);
+#endif
+}
+
+// Check if audio stream is playing.
+bool IsAudioStreamPlaying(AudioStream stream)
+{
+#if USE_MINI_AL
+    return IsAudioBufferPlaying((AudioBuffer*)stream.audioBuffer);
+#else
+    bool playing = false;
+    ALint state;
+
+    alGetSourcei(stream.source, AL_SOURCE_STATE, &state);
+
+    if (state == AL_PLAYING) playing = true;
+
+    return playing;
+#endif
 }
 
 // Stop audio stream
 void StopAudioStream(AudioStream stream)
 {
+#if USE_MINI_AL
+    StopAudioBuffer((AudioBuffer*)stream.audioBuffer);
+#else
     alSourceStop(stream.source);
+#endif
+}
+
+void SetAudioStreamVolume(AudioStream stream, float volume)
+{
+#if USE_MINI_AL
+    SetAudioBufferVolume((AudioBuffer*)stream.audioBuffer, volume);
+#else
+    alSourcef(stream.source, AL_GAIN, volume);
+#endif
+}
+
+void SetAudioStreamPitch(AudioStream stream, float pitch)
+{
+#if USE_MINI_AL
+    SetAudioBufferPitch((AudioBuffer*)stream.audioBuffer, pitch);
+#else
+    alSourcef(stream.source, AL_PITCH, pitch);
+#endif
 }
 
 //----------------------------------------------------------------------------------
diff --git a/raylib/audio.go b/raylib/audio.go
index 486f725..989eceb 100644
--- a/raylib/audio.go
+++ b/raylib/audio.go
@@ -42,11 +42,11 @@ func NewWaveFromPointer(ptr unsafe.Pointer) Wave {
 
 // Sound source type
 type Sound struct {
-	// OpenAL audio source id
+	// Audio source id
 	Source uint32
-	// OpenAL audio buffer id
+	// Audio buffer id
 	Buffer uint32
-	// OpenAL audio format specifier
+	// Audio format specifier
 	Format int32
 }
 
@@ -77,11 +77,11 @@ type AudioStream struct {
 	SampleSize uint32
 	// Number of channels (1-mono, 2-stereo)
 	Channels uint32
-	// OpenAL audio format specifier
+	// Audio format specifier
 	Format int32
-	// OpenAL audio source id
+	// Audio source id
 	Source uint32
-	// OpenAL audio buffers (double buffering)
+	// Audio buffers (double buffering)
 	Buffers [2]uint32
 }
 
@@ -334,9 +334,9 @@ func SetMusicPitch(music Music, pitch float32) {
 
 // SetMusicLoopCount - Set music loop count (loop repeats)
 // NOTE: If set to -1, means infinite loop
-func SetMusicLoopCount(music Music, count float32) {
+func SetMusicLoopCount(music Music, count int32) {
 	cmusic := *(*C.Music)(unsafe.Pointer(&music))
-	ccount := (C.float)(count)
+	ccount := (C.int)(count)
 	C.SetMusicLoopCount(cmusic, ccount)
 }
 
diff --git a/raylib/audio.h b/raylib/audio.h
index f135fda..48ef740 100644
--- a/raylib/audio.h
+++ b/raylib/audio.h
@@ -1,5 +1,3 @@
-// +build !noaudio
-
 /**********************************************************************************************
 *
 *   raylib.audio - Basic funtionality to work with audio
@@ -169,4 +167,4 @@ void StopAudioStream(AudioStream stream);                       // Stop audio st
 }
 #endif
 
-#endif // AUDIO_H
+#endif // AUDIO_H
\ No newline at end of file
diff --git a/raylib/cgo_android.go b/raylib/cgo_android.go
index 39c86d4..e56b130 100644
--- a/raylib/cgo_android.go
+++ b/raylib/cgo_android.go
@@ -4,8 +4,6 @@ package raylib
 
 /*
 #cgo android LDFLAGS: -llog -landroid -lEGL -lGLESv2 -lOpenSLES -lm -landroid_native_app_glue -u ANativeActivity_onCreate
-#cgo android CFLAGS: -DPLATFORM_ANDROID -DGRAPHICS_API_OPENGL_ES2
-
-#cgo android,!noaudio LDFLAGS: -lopenal
+#cgo android CFLAGS: -DPLATFORM_ANDROID -DGRAPHICS_API_OPENGL_ES2 -Iexternal
 */
 import "C"
diff --git a/raylib/cgo_darwin.go b/raylib/cgo_darwin.go
index 22893cd..8b7bf07 100644
--- a/raylib/cgo_darwin.go
+++ b/raylib/cgo_darwin.go
@@ -16,14 +16,12 @@ package raylib
 #include "external/glfw/src/cocoa_time.c"
 #include "external/glfw/src/cocoa_window.m"
 #include "external/glfw/src/posix_thread.c"
-#include "external/glfw/src/posix_time.c"
 #include "external/glfw/src/nsgl_context.m"
+#include "external/glfw/src/egl_context.c"
 #include "external/glfw/src/osmesa_context.c"
 
 #cgo darwin LDFLAGS: -framework OpenGL -framework Cocoa -framework IOKit -framework CoreVideo -framework CoreFoundation
-#cgo darwin CFLAGS: -x objective-c -Iexternal/glfw/include -D_GLFW_COCOA -D_GLFW_USE_CHDIR -D_GLFW_USE_MENUBAR -D_GLFW_USE_RETINA -Wno-deprecated-declarations -DPLATFORM_DESKTOP
-
-#cgo darwin,!noaudio LDFLAGS: -framework OpenAL
+#cgo darwin CFLAGS: -x objective-c -Iexternal -Iexternal/glfw/include -D_GLFW_COCOA -D_GLFW_USE_CHDIR -D_GLFW_USE_MENUBAR -D_GLFW_USE_RETINA -Wno-deprecated-declarations -DPLATFORM_DESKTOP -DMAL_NO_COREAUDIO
 
 #cgo darwin,opengl11 CFLAGS: -DGRAPHICS_API_OPENGL_11
 #cgo darwin,opengl21 CFLAGS: -DGRAPHICS_API_OPENGL_21
diff --git a/raylib/cgo_linux.go b/raylib/cgo_linux.go
index 62ef966..5297216 100644
--- a/raylib/cgo_linux.go
+++ b/raylib/cgo_linux.go
@@ -16,7 +16,6 @@ package raylib
 #include "external/glfw/src/wl_window.c"
 #include "external/glfw/src/wayland-pointer-constraints-unstable-v1-client-protocol.c"
 #include "external/glfw/src/wayland-relative-pointer-unstable-v1-client-protocol.c"
-#include "external/glfw/src/wayland-idle-inhibit-unstable-v1-client-protocol.c"
 #endif
 #ifdef _GLFW_X11
 #include "external/glfw/src/x11_init.c"
@@ -32,7 +31,7 @@ package raylib
 #include "external/glfw/src/egl_context.c"
 #include "external/glfw/src/osmesa_context.c"
 
-#cgo linux CFLAGS: -Iexternal/glfw/include -DPLATFORM_DESKTOP
+#cgo linux CFLAGS: -Iexternal -Iexternal/glfw/include -DPLATFORM_DESKTOP
 
 #cgo linux,!wayland LDFLAGS: -lGL -lm -pthread -ldl -lrt -lX11
 #cgo linux,wayland LDFLAGS: -lGL -lm -pthread -ldl -lrt -lwayland-client -lwayland-cursor -lwayland-egl -lxkbcommon
@@ -40,11 +39,6 @@ package raylib
 #cgo linux,!wayland CFLAGS: -D_GLFW_X11
 #cgo linux,wayland CFLAGS: -D_GLFW_WAYLAND
 
-#cgo linux,!noaudio LDFLAGS: -lopenal
-
-#cgo linux,!static CFLAGS: -DSHARED_OPENAL
-#cgo linux,static CFLAGS: -DAL_LIBTYPE_STATIC
-
 #cgo linux,opengl11 CFLAGS: -DGRAPHICS_API_OPENGL_11
 #cgo linux,opengl21 CFLAGS: -DGRAPHICS_API_OPENGL_21
 #cgo linux,!opengl11,!opengl21 CFLAGS: -DGRAPHICS_API_OPENGL_33
diff --git a/raylib/cgo_linux_arm.go b/raylib/cgo_linux_arm.go
index cbdb4d0..3326212 100644
--- a/raylib/cgo_linux_arm.go
+++ b/raylib/cgo_linux_arm.go
@@ -3,9 +3,7 @@
 package raylib
 
 /*
-#cgo linux,arm LDFLAGS: -lbrcmGLESv2 -lbrcmEGL -lpthread -lrt -lm -lbcm_host -lvcos -lvchiq_arm
-#cgo linux,arm CFLAGS: -DPLATFORM_RPI -DGRAPHICS_API_OPENGL_ES2 -I/opt/vc/include -I/opt/vc/include/interface/vcos -I/opt/vc/include/interface/vmcs_host/linux -I/opt/vc/include/interface/vcos/pthreads
-
-#cgo linux,arm,!noaudio LDFLAGS: -lopenal
+#cgo linux,arm LDFLAGS: -lbrcmGLESv2 -lbrcmEGL -lpthread -lrt -lm -lbcm_host -lvcos -lvchiq_arm -ldl
+#cgo linux,arm CFLAGS: -DPLATFORM_RPI -DGRAPHICS_API_OPENGL_ES2 -Iexternal -I/opt/vc/include -I/opt/vc/include/interface/vcos -I/opt/vc/include/interface/vmcs_host/linux -I/opt/vc/include/interface/vcos/pthreads
 */
 import "C"
diff --git a/raylib/cgo_windows.go b/raylib/cgo_windows.go
index 2d215f7..6615cc2 100644
--- a/raylib/cgo_windows.go
+++ b/raylib/cgo_windows.go
@@ -21,12 +21,7 @@ package raylib
 #include "external/glfw/src/osmesa_context.c"
 
 #cgo windows LDFLAGS: -lopengl32 -lgdi32 -lwinmm -lole32
-#cgo windows CFLAGS: -D_GLFW_WIN32 -Iexternal/glfw/include -Iexternal/glfw/deps/mingw -DPLATFORM_DESKTOP
-
-#cgo windows,!noaudio LDFLAGS: -lopenal
-
-#cgo windows,!static CFLAGS: -DSHARED_OPENAL
-#cgo windows,static CFLAGS: -DAL_LIBTYPE_STATIC
+#cgo windows CFLAGS: -D_GLFW_WIN32 -Iexternal -Iexternal/glfw/include -Iexternal/glfw/deps/mingw -DPLATFORM_DESKTOP
 
 #cgo windows,opengl11 CFLAGS: -DGRAPHICS_API_OPENGL_11
 #cgo windows,opengl21 CFLAGS: -DGRAPHICS_API_OPENGL_21
diff --git a/raylib/core.c b/raylib/core.c
index 01febd6..36b9de6 100644
--- a/raylib/core.c
+++ b/raylib/core.c
@@ -2,19 +2,20 @@
 *
 *   raylib.core - Basic functions to manage windows, OpenGL context and input on multiple platforms
 *
-*   PLATFORMS SUPPORTED: 
-*       - Windows (Win32, Win64)
-*       - Linux (tested on Ubuntu)
-*       - FreeBSD
-*       - OSX/macOS
-*       - Android (ARM, ARM64) 
-*       - Raspberry Pi (Raspbian)
-*       - HTML5 (Chrome, Firefox)
+*   PLATFORMS SUPPORTED:
+*       PLATFORM_DESKTOP:  Windows (Win32, Win64)
+*       PLATFORM_DESKTOP:  Linux (32 and 64 bit)
+*       PLATFORM_DESKTOP:  OSX/macOS
+*       PLATFORM_DESKTOP:  FreeBSD
+*       PLATFORM_ANDROID:  Android (ARM, ARM64)
+*       PLATFORM_RPI:      Raspberry Pi (Raspbian)
+*       PLATFORM_WEB:      HTML5 (Chrome, Firefox)
+*       PLATFORM_UWP:      Universal Windows Platform
 *
 *   CONFIGURATION:
 *
 *   #define PLATFORM_DESKTOP
-*       Windowing and input system configured for desktop platforms: Windows, Linux, OSX, FreeBSD (managed by GLFW3 library)
+*       Windowing and input system configured for desktop platforms: Windows, Linux, OSX, FreeBSD
 *       NOTE: Oculus Rift CV1 requires PLATFORM_DESKTOP for mirror rendering - View [rlgl] module to enable it
 *
 *   #define PLATFORM_ANDROID
@@ -22,8 +23,8 @@
 *       NOTE: OpenGL ES 2.0 is required and graphic device is managed by EGL
 *
 *   #define PLATFORM_RPI
-*       Windowing and input system configured for Raspberry Pi (tested on Raspbian), graphic device is managed by EGL 
-*       and inputs are processed is raw mode, reading from /dev/input/
+*       Windowing and input system configured for Raspberry Pi i native mode (no X.org required, tested on Raspbian), 
+*       graphic device is managed by EGL and inputs are processed is raw mode, reading from /dev/input/
 *
 *   #define PLATFORM_WEB
 *       Windowing and input system configured for HTML5 (run on browser), code converted from C to asm.js
@@ -49,15 +50,15 @@
 *       Allow automatic gif recording of current screen pressing CTRL+F12, defined in KeyCallback()
 *
 *   DEPENDENCIES:
-*       GLFW3    - Manage graphic device, OpenGL context and inputs on PLATFORM_DESKTOP (Windows, Linux, OSX)
-*       raymath  - 3D math functionality (Vector3, Matrix, Quaternion)
+*       rglfw    - Manage graphic device, OpenGL context and inputs on PLATFORM_DESKTOP (Windows, Linux, OSX. FreeBSD)
+*       raymath  - 3D math functionality (Vector2, Vector3, Matrix, Quaternion)
 *       camera   - Multiple 3D camera modes (free, orbital, 1st person, 3rd person)
 *       gestures - Gestures system for touch-ready devices (or simulated from mouse inputs)
 *
 *
 *   LICENSE: zlib/libpng
 *
-*   Copyright (c) 2014-2017 Ramon Santamaria (@raysan5)
+*   Copyright (c) 2014-2018 Ramon Santamaria (@raysan5)
 *
 *   This software is provided "as-is", without any express or implied warranty. In no event
 *   will the authors be held liable for any damages arising from the use of this software.
@@ -88,6 +89,11 @@
 
 #include "raylib.h"
 
+#if (defined(__linux__) || defined(PLATFORM_WEB)) && _POSIX_S_SOURCE < 199309L
+    #undef _POSIX_C_SOURCE
+    #define _POSIX_C_SOURCE 199309L // Required for CLOCK_MONOTONIC if compiled with c99 without gnu ext.
+#endif
+
 #include "rlgl.h"           // raylib OpenGL abstraction layer to OpenGL 1.1, 3.3+ or ES2
 #include "utils.h"          // Required for: fopen() Android mapping
 
@@ -110,10 +116,6 @@
     #include "external/rgif.h"   // Support GIF recording
 #endif
 
-#if defined(__linux__) || defined(PLATFORM_WEB)
-    /*#define _POSIX_C_SOURCE 199309L // Required for CLOCK_MONOTONIC if compiled with c99 without gnu ext.*/
-#endif
-
 #include <stdio.h>          // Standard input / output lib
 #include <stdlib.h>         // Required for: malloc(), free(), rand(), atexit()
 #include <stdint.h>         // Required for: typedef unsigned long long int uint64_t, used by hi-res timer
@@ -151,7 +153,6 @@
     //#define GLFW_DLL          // Using GLFW DLL on Windows -> No, we use static version!
     
     #if !defined(SUPPORT_BUSY_WAIT_LOOP) && defined(_WIN32)
-    // NOTE: Those functions require linking with winmm library
     __stdcall unsigned int timeBeginPeriod(unsigned int uPeriod);
     __stdcall unsigned int timeEndPeriod(unsigned int uPeriod);
     #endif
@@ -413,7 +414,7 @@ static void *GamepadThread(void *arg);                  // Mouse reading thread
 // NOTE: data parameter could be used to pass any kind of required data to the initialization
 void InitWindow(int width, int height, void *data)
 {
-    TraceLog(LOG_INFO, "Initializing raylib (v1.8.0)");
+    TraceLog(LOG_INFO, "Initializing raylib (v1.9-dev)");
 
     // Input data is window title char data
     windowTitle = (char *)data;
@@ -477,7 +478,7 @@ void InitWindow(int width, int height, void *data)
 // NOTE: data parameter could be used to pass any kind of required data to the initialization
 void InitWindow(int width, int height, void *data)
 {
-    TraceLog(LOG_INFO, "Initializing raylib (v1.8.0)");
+    TraceLog(LOG_INFO, "Initializing raylib (v1.9-dev)");
 
     screenWidth = width;
     screenHeight = height;
diff --git a/raylib/external/dr_flac.h b/raylib/external/dr_flac.h
index 5388330..60f57ec 100644
--- a/raylib/external/dr_flac.h
+++ b/raylib/external/dr_flac.h
@@ -1,13 +1,13 @@
 // FLAC audio decoder. Public domain. See "unlicense" statement at the end of this file.
-// dr_flac - v0.4c - 2016-12-26
+// dr_flac - v0.8d - 2017-09-22
 //
 // David Reid - mackron@gmail.com
 
 // USAGE
 //
 // dr_flac is a single-file library. To use it, do something like the following in one .c file.
-//   #define DR_FLAC_IMPLEMENTATION
-//   #include "dr_flac.h"
+//     #define DR_FLAC_IMPLEMENTATION
+//     #include "dr_flac.h"
 //
 // You can then #include this file in other parts of the program as you would with any other header file. To decode audio data,
 // do something like the following:
@@ -17,8 +17,8 @@
 //         // Failed to open FLAC file
 //     }
 //
-//     int32_t* pSamples = malloc(pFlac->totalSampleCount * sizeof(int32_t));
-//     uint64_t numberOfInterleavedSamplesActuallyRead = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSamples);
+//     drflac_int32* pSamples = malloc(pFlac->totalSampleCount * sizeof(drflac_int32));
+//     drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSamples);
 //
 // The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of
 // channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are
@@ -43,8 +43,8 @@
 //
 //     unsigned int channels;
 //     unsigned int sampleRate;
-//     uint64_t totalSampleCount;
-//     int32_t* pSampleData = drflac_open_and_decode_file("MySong.flac", &channels, &sampleRate, &totalSampleCount);
+//     drflac_uint64 totalSampleCount;
+//     drflac_int32* pSampleData = drflac_open_and_decode_file_s32("MySong.flac", &channels, &sampleRate, &totalSampleCount);
 //     if (pSampleData == NULL) {
 //         // Failed to open and decode FLAC file.
 //     }
@@ -54,13 +54,25 @@
 //     drflac_free(pSampleData);
 //
 //
+// You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs
+// respectively, but note that these should be considered lossy.
+//
+//
 // If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or
 // drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the
 // normal versions and also just a little bit harder to use.
 //
 // dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before
-// drflac_open_with_metdata() returns. See https://github.com/mackron/dr_libs_tests/blob/master/dr_flac/dr_flac_test_2.c for
-// an example on how to read metadata.
+// drflac_open_with_metdata() returns.
+//
+//
+// The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain
+// scenarios such as broadcast style streams like internet radio where the header may not be present because the user has
+// started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed().
+//
+// It is not recommended to use these APIs for file based streams because a missing header would usually indicate a
+// corrupted or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend
+// a lot of time finding the first frame.
 //
 //
 //
@@ -84,54 +96,53 @@
 //   returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of
 //   onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
 //
+// #define DR_FLAC_NO_CRC
+//   Disables CRC checks. This will offer a performance boost when CRC is unnecessary.
+//
+// #define DR_FLAC_NO_SIMD
+//   Disables SIMD optimizations (SSE on x86/x64 architectures). Use this if you are having compatibility issues with your
+//   compiler.
+//
 //
 //
 // QUICK NOTES
-// - Based on my tests, the performance of the 32-bit build is at about parity with the reference implementation. The 64-bit build
-//   is slightly faster.
-// - dr_flac does not currently do any CRC checks.
-// - dr_flac should work fine with valid native FLAC files, but for broadcast streams it won't work if the header and STREAMINFO
-//   block is unavailable.
 // - Audio data is output as signed 32-bit PCM, regardless of the bits per sample the FLAC stream is encoded as.
 // - This has not been tested on big-endian architectures.
 // - Rice codes in unencoded binary form (see https://xiph.org/flac/format.html#rice_partition) has not been tested. If anybody
 //   knows where I can find some test files for this, let me know.
-// - Perverse and erroneous files have not been tested. Again, if you know where I can get some test files let me know.
 // - dr_flac is not thread-safe, but it's APIs can be called from any thread so long as you do your own synchronization.
+// - When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open()
+//   returning inconsistent samples.
 
 #ifndef dr_flac_h
 #define dr_flac_h
 
-#include <stdint.h>
 #include <stddef.h>
 
-#ifndef DR_SIZED_TYPES_DEFINED
-#define DR_SIZED_TYPES_DEFINED
 #if defined(_MSC_VER) && _MSC_VER < 1600
-typedef   signed char    dr_int8;
-typedef unsigned char    dr_uint8;
-typedef   signed short   dr_int16;
-typedef unsigned short   dr_uint16;
-typedef   signed int     dr_int32;
-typedef unsigned int     dr_uint32;
-typedef   signed __int64 dr_int64;
-typedef unsigned __int64 dr_uint64;
+typedef   signed char    drflac_int8;
+typedef unsigned char    drflac_uint8;
+typedef   signed short   drflac_int16;
+typedef unsigned short   drflac_uint16;
+typedef   signed int     drflac_int32;
+typedef unsigned int     drflac_uint32;
+typedef   signed __int64 drflac_int64;
+typedef unsigned __int64 drflac_uint64;
 #else
 #include <stdint.h>
-typedef int8_t           dr_int8;
-typedef uint8_t          dr_uint8;
-typedef int16_t          dr_int16;
-typedef uint16_t         dr_uint16;
-typedef int32_t          dr_int32;
-typedef uint32_t         dr_uint32;
-typedef int64_t          dr_int64;
-typedef uint64_t         dr_uint64;
-#endif
-typedef dr_int8          dr_bool8;
-typedef dr_int32         dr_bool32;
-#define DR_TRUE          1
-#define DR_FALSE         0
+typedef int8_t           drflac_int8;
+typedef uint8_t          drflac_uint8;
+typedef int16_t          drflac_int16;
+typedef uint16_t         drflac_uint16;
+typedef int32_t          drflac_int32;
+typedef uint32_t         drflac_uint32;
+typedef int64_t          drflac_int64;
+typedef uint64_t         drflac_uint64;
 #endif
+typedef drflac_uint8     drflac_bool8;
+typedef drflac_uint32    drflac_bool32;
+#define DRFLAC_TRUE      1
+#define DRFLAC_FALSE     0
 
 // As data is read from the client it is placed into an internal buffer for fast access. This controls the
 // size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing
@@ -156,9 +167,9 @@ extern "C" {
 #endif
 
 #ifdef DRFLAC_64BIT
-typedef uint64_t drflac_cache_t;
+typedef drflac_uint64 drflac_cache_t;
 #else
-typedef uint32_t drflac_cache_t;
+typedef drflac_uint32 drflac_cache_t;
 #endif
 
 // The various metadata block types.
@@ -197,7 +208,8 @@ typedef uint32_t drflac_cache_t;
 typedef enum
 {
     drflac_container_native,
-    drflac_container_ogg
+    drflac_container_ogg,
+    drflac_container_unknown
 } drflac_container;
 
 typedef enum
@@ -210,29 +222,29 @@ typedef enum
 #pragma pack(2)
 typedef struct
 {
-    uint64_t firstSample;
-    uint64_t frameOffset;   // The offset from the first byte of the header of the first frame.
-    uint16_t sampleCount;
+    drflac_uint64 firstSample;
+    drflac_uint64 frameOffset;   // The offset from the first byte of the header of the first frame.
+    drflac_uint16 sampleCount;
 } drflac_seekpoint;
 #pragma pack()
 
 typedef struct
 {
-    uint16_t minBlockSize;
-    uint16_t maxBlockSize;
-    uint32_t minFrameSize;
-    uint32_t maxFrameSize;
-    uint32_t sampleRate;
-    uint8_t  channels;
-    uint8_t  bitsPerSample;
-    uint64_t totalSampleCount;
-    uint8_t  md5[16];
+    drflac_uint16 minBlockSize;
+    drflac_uint16 maxBlockSize;
+    drflac_uint32 minFrameSize;
+    drflac_uint32 maxFrameSize;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalSampleCount;
+    drflac_uint8  md5[16];
 } drflac_streaminfo;
 
 typedef struct
 {
     // The metadata type. Use this to know how to interpret the data below.
-    uint32_t type;
+    drflac_uint32 type;
 
     // A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
     // not modify the contents of this buffer. Use the structures below for more meaningful and structured
@@ -240,7 +252,7 @@ typedef struct
     const void* pRawData;
 
     // The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL.
-    uint32_t rawDataSize;
+    drflac_uint32 rawDataSize;
 
     union
     {
@@ -253,50 +265,49 @@ typedef struct
 
         struct
         {
-            uint32_t id;
+            drflac_uint32 id;
             const void* pData;
-            uint32_t dataSize;
+            drflac_uint32 dataSize;
         } application;
 
         struct
         {
-            uint32_t seekpointCount;
+            drflac_uint32 seekpointCount;
             const drflac_seekpoint* pSeekpoints;
         } seektable;
 
         struct
         {
-            uint32_t vendorLength;
+            drflac_uint32 vendorLength;
             const char* vendor;
-            uint32_t commentCount;
+            drflac_uint32 commentCount;
             const char* comments;
         } vorbis_comment;
 
         struct
         {
             char catalog[128];
-            uint64_t leadInSampleCount;
-            dr_bool32 isCD;
-            uint8_t trackCount;
-            const uint8_t* pTrackData;
+            drflac_uint64 leadInSampleCount;
+            drflac_bool32 isCD;
+            drflac_uint8 trackCount;
+            const drflac_uint8* pTrackData;
         } cuesheet;
 
         struct
         {
-            uint32_t type;
-            uint32_t mimeLength;
+            drflac_uint32 type;
+            drflac_uint32 mimeLength;
             const char* mime;
-            uint32_t descriptionLength;
+            drflac_uint32 descriptionLength;
             const char* description;
-            uint32_t width;
-            uint32_t height;
-            uint32_t colorDepth;
-            uint32_t indexColorCount;
-            uint32_t pictureDataSize;
-            const uint8_t* pPictureData;
+            drflac_uint32 width;
+            drflac_uint32 height;
+            drflac_uint32 colorDepth;
+            drflac_uint32 indexColorCount;
+            drflac_uint32 pictureDataSize;
+            const drflac_uint8* pPictureData;
         } picture;
     } data;
-
 } drflac_metadata;
 
 
@@ -307,6 +318,9 @@ typedef struct
 // bytesToRead [in]  The number of bytes to read.
 //
 // Returns the number of bytes actually read.
+//
+// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+// either the entire bytesToRead is filled or you have reached the end of the stream.
 typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
 
 // Callback for when data needs to be seeked.
@@ -319,7 +333,7 @@ typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t by
 //
 // The offset will never be negative. Whether or not it is relative to the beginning or current position is determined
 // by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current.
-typedef dr_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
+typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
 
 // Callback for when a metadata block is read.
 //
@@ -333,7 +347,7 @@ typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
 // Structure for internal use. Only used for decoders opened with drflac_open_memory.
 typedef struct
 {
-    const uint8_t* data;
+    const drflac_uint8* data;
     size_t dataSize;
     size_t currentReadPos;
 } drflac__memory_stream;
@@ -360,64 +374,67 @@ typedef struct
     drflac_cache_t unalignedCache;
 
     // The index of the next valid cache line in the "L2" cache.
-    size_t nextL2Line;
+    drflac_uint32 nextL2Line;
 
     // The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining.
-    size_t consumedBits;
+    drflac_uint32 consumedBits;
 
     // The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
     // Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
     drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
     drflac_cache_t cache;
 
+    // CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    // is reset to 0 at the beginning of each frame.
+    drflac_uint16 crc16;
+    drflac_cache_t crc16Cache;          // A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded.
+    drflac_uint32 crc16CacheIgnoredBytes;   // The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache.
 } drflac_bs;
 
 typedef struct
 {
     // The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC.
-    uint8_t subframeType;
+    drflac_uint8 subframeType;
 
     // The number of wasted bits per sample as specified by the sub-frame header.
-    uint8_t wastedBitsPerSample;
+    drflac_uint8 wastedBitsPerSample;
 
     // The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC.
-    uint8_t lpcOrder;
+    drflac_uint8 lpcOrder;
 
     // The number of bits per sample for this subframe. This is not always equal to the current frame's bit per sample because
     // an extra bit is required for side channels when interchannel decorrelation is being used.
-    uint32_t bitsPerSample;
-
-    // A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData, or
-    // NULL if the heap is not being used. Note that it's a signed 32-bit integer for each value.
-    int32_t* pDecodedSamples;
+    drflac_uint32 bitsPerSample;
 
+    // A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. Note that
+    // it's a signed 32-bit integer for each value.
+    drflac_int32* pDecodedSamples;
 } drflac_subframe;
 
 typedef struct
 {
     // If the stream uses variable block sizes, this will be set to the index of the first sample. If fixed block sizes are used, this will
     // always be set to 0.
-    uint64_t sampleNumber;
+    drflac_uint64 sampleNumber;
 
     // If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0.
-    uint32_t frameNumber;
+    drflac_uint32 frameNumber;
 
     // The sample rate of this frame.
-    uint32_t sampleRate;
+    drflac_uint32 sampleRate;
 
     // The number of samples in each sub-frame within this frame.
-    uint16_t blockSize;
+    drflac_uint16 blockSize;
 
     // The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
     // will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
-    uint8_t channelAssignment;
+    drflac_uint8 channelAssignment;
 
     // The number of bits per sample within this frame.
-    uint8_t bitsPerSample;
-
-    // The frame's CRC. This is set, but unused at the moment.
-    uint8_t crc8;
+    drflac_uint8 bitsPerSample;
 
+    // The frame's CRC.
+    drflac_uint8 crc8;
 } drflac_frame_header;
 
 typedef struct
@@ -427,11 +444,10 @@ typedef struct
 
     // The number of samples left to be read in this frame. This is initially set to the block size multiplied by the channel count. As samples
     // are read, this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
-    uint32_t samplesRemaining;
+    drflac_uint32 samplesRemaining;
 
     // The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels.
     drflac_subframe subframes[8];
-
 } drflac_frame;
 
 typedef struct
@@ -444,22 +460,22 @@ typedef struct
 
 
     // The sample rate. Will be set to something like 44100.
-    uint32_t sampleRate;
+    drflac_uint32 sampleRate;
 
     // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
     // value specified in the STREAMINFO block.
-    uint8_t channels;
+    drflac_uint8 channels;
 
     // The bits per sample. Will be set to somthing like 16, 24, etc.
-    uint8_t bitsPerSample;
+    drflac_uint8 bitsPerSample;
 
     // The maximum block size, in samples. This number represents the number of samples in each channel (not combined).
-    uint16_t maxBlockSize;
+    drflac_uint16 maxBlockSize;
 
     // The total number of samples making up the stream. This includes every channel. For example, if the stream has 2 channels,
     // with each channel having a total of 4096, this value will be set to 2*4096 = 8192. Can be 0 in which case it's still a
     // valid stream, but just means the total sample count is unknown. Likely the case with streams like internet radio.
-    uint64_t totalSampleCount;
+    drflac_uint64 totalSampleCount;
 
 
     // The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream.
@@ -467,34 +483,34 @@ typedef struct
 
 
     // The position of the seektable in the file.
-    uint64_t seektablePos;
+    drflac_uint64 seektablePos;
 
     // The size of the seektable.
-    uint32_t seektableSize;
+    drflac_uint32 seektableSize;
 
 
     // Information about the frame the decoder is currently sitting on.
     drflac_frame currentFrame;
 
     // The position of the first frame in the stream. This is only ever used for seeking.
-    uint64_t firstFramePos;
+    drflac_uint64 firstFramePos;
 
 
     // A hack to avoid a malloc() when opening a decoder with drflac_open_memory().
     drflac__memory_stream memoryStream;
 
 
-
     // A pointer to the decoded sample data. This is an offset of pExtraData.
-    int32_t* pDecodedSamples;
+    drflac_int32* pDecodedSamples;
 
+    // Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData.
+    void* _oggbs;
 
     // The bit streamer. The raw FLAC data is fed through this object.
     drflac_bs bs;
 
-    // Variable length extra data. We attach this to the end of the object so we avoid unnecessary mallocs.
-    uint8_t pExtraData[1];
-
+    // Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs.
+    drflac_uint8 pExtraData[1];
 } drflac;
 
 
@@ -515,11 +531,22 @@ typedef struct
 // This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory()
 // to open the stream from a file or from a block of memory respectively.
 //
-// The STREAMINFO block must be present for this to succeed.
+// The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where
+// the header may not be present.
 //
 // See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close()
 drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData);
 
+// The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+//
+// Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do
+// not set this to drflac_container_unknown - that is for internal use only.
+//
+// Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never
+// found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an
+// indicator that the end of the stream was found.
+drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData);
+
 // Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
 //
 // onRead    [in]           The function to call when data needs to be read from the client.
@@ -531,15 +558,29 @@ drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUse
 //
 // Close the decoder with drflac_close().
 //
-// This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will do a malloc()
-// and free() for every metadata block except for STREAMINFO and PADDING blocks.
+// This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will do a DRFLAC_MALLOC()
+// and DRFLAC_FREE() for every metadata block except for STREAMINFO and PADDING blocks.
 //
-// The caller is notified of the metadata via the onMeta callback. All metadata blocks with be handled before the function
+// The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function
 // returns.
 //
+// The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC
+// stream where the header may not be present.
+//
+// Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata
+// block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata()
+// is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply
+// seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on
+// whether or not the stream is being opened with metadata.
+//
 // See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close()
 drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData);
 
+// The same as drflac_open_with_metadata(), except attemps to open the stream even when a header block is not present.
+//
+// See also: drflac_open_with_metadata(), drflac_open_relaxed()
+drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData);
+
 // Closes the given FLAC decoder.
 //
 // pFlac [in] The decoder to close.
@@ -558,25 +599,50 @@ void drflac_close(drflac* pFlac);
 //
 // pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
 // seeked.
-uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* pBufferOut);
+drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut);
 
-// Same as drflac_read_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. Note
-// that this is lossey.
-uint64_t drflac_read_s16(drflac* pFlac, uint64_t samplesToRead, int16_t* pBufferOut);
+// Same as drflac_read_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit.
+//
+// pFlac         [in]            The decoder.
+// samplesToRead [in]            The number of samples to read.
+// pBufferOut    [out, optional] A pointer to the buffer that will receive the decoded samples.
+//
+// Returns the number of samples actually read.
+//
+// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
+// seeked.
+//
+// Note that this is lossy for streams where the bits per sample is larger than 16.
+drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut);
+
+// Same as drflac_read_s32(), except outputs samples as 32-bit floating-point PCM.
+//
+// pFlac         [in]            The decoder.
+// samplesToRead [in]            The number of samples to read.
+// pBufferOut    [out, optional] A pointer to the buffer that will receive the decoded samples.
+//
+// Returns the number of samples actually read.
+//
+// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
+// seeked.
+//
+// Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly
+// represent every possible number.
+drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut);
 
 // Seeks to the sample at the given index.
 //
 // pFlac       [in] The decoder.
 // sampleIndex [in] The index of the sample to seek to. See notes below.
 //
-// Returns DR_TRUE if successful; DR_FALSE otherwise.
+// Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise.
 //
 // The sample index is based on interleaving. In a stereo stream, for example, the sample at index 0 is the first sample
 // in the left channel; the sample at index 1 is the first sample on the right channel, and so on.
 //
 // When seeking, you will likely want to ensure it's rounded to a multiple of the channel count. You can do this with
 // something like drflac_seek_to_sample(pFlac, (mySampleIndex + (mySampleIndex % pFlac->channels)))
-dr_bool32 drflac_seek_to_sample(drflac* pFlac, uint64_t sampleIndex);
+drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex);
 
 
 
@@ -618,43 +684,58 @@ drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drfl
 //// High Level APIs ////
 
 // Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
-// pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
+// pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with DRFLAC_FREE().
 //
 // Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
 // read samples into a dynamically sized buffer on the heap until no samples are left.
 //
 // Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
-int32_t* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
-int16_t* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
+drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Same as drflac_open_and_decode_s32(), except returns signed 16-bit integer samples.
+drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Same as drflac_open_and_decode_s32(), except returns 32-bit floating-point samples.
+float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
 
 #ifndef DR_FLAC_NO_STDIO
 // Same as drflac_open_and_decode_s32() except opens the decoder from a file.
-int32_t* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
-int16_t* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
+drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Same as drflac_open_and_decode_file_s32(), except returns signed 16-bit integer samples.
+drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Same as drflac_open_and_decode_file_f32(), except returns 32-bit floating-point samples.
+float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
 #endif
 
 // Same as drflac_open_and_decode_s32() except opens the decoder from a block of memory.
-int32_t* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
-int16_t* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount);
+drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
 
-// Frees data returned by drflac_open_and_decode_*().
-void drflac_free(void* pSampleDataReturnedByOpenAndDecode);
+// Same as drflac_open_and_decode_memory_s32(), except returns signed 16-bit integer samples.
+drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Same as drflac_open_and_decode_memory_s32(), except returns 32-bit floating-point samples.
+float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+
+// Frees memory that was allocated internally by dr_flac.
+void drflac_free(void* p);
 
 
 // Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block.
 typedef struct
 {
-    uint32_t countRemaining;
+    drflac_uint32 countRemaining;
     const char* pRunningData;
 } drflac_vorbis_comment_iterator;
 
 // Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
 // metadata block.
-void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, uint32_t commentCount, const char* pComments);
+void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const char* pComments);
 
 // Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
 // returned string is NOT null terminated.
-const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, uint32_t* pCommentLengthOut);
+const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
 
 
 
@@ -672,24 +753,119 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, ui
 #ifdef DR_FLAC_IMPLEMENTATION
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
 
-#ifdef _MSC_VER
-#include <intrin.h>     // For _byteswap_ulong and _byteswap_uint64
+// CPU architecture.
+#if defined(__x86_64__) || defined(_M_X64)
+#define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+#define DRFLAC_X86
 #endif
 
+// Compile-time CPU feature support.
+#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #ifdef _MSC_VER
+        #if _MSC_VER >= 1400
+            #include <intrin.h>
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                __cpuid(info, fid);
+            }
+        #else
+        #define DRFLAC_NO_CPUID
+        #endif
+    #else
+        #if defined(__GNUC__) || defined(__clang__)
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                asm (
+                    "movl %[fid], %%eax\n\t"
+                    "cpuid\n\t"
+                    "movl %%eax, %[info0]\n\t"
+                    "movl %%ebx, %[info1]\n\t"
+                    "movl %%ecx, %[info2]\n\t"
+                    "movl %%edx, %[info3]\n\t"
+                    : [info0] "=rm"(info[0]),
+                      [info1] "=rm"(info[1]),
+                      [info2] "=rm"(info[2]),
+                      [info3] "=rm"(info[3])
+                    : [fid] "rm"(fid)
+                    : "eax", "ebx", "ecx", "edx"
+                );
+            }
+        #else
+        #define DRFLAC_NO_CPUID
+        #endif
+    #endif
+#else
+#define DRFLAC_NO_CPUID
+#endif
+
+
 #ifdef __linux__
-#ifndef _BSD_SOURCE
 #define _BSD_SOURCE
-#endif
 #include <endian.h>
 #endif
 
+#if defined(_MSC_VER) && _MSC_VER >= 1500
+#define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+#define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif defined(__clang__)
+    #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+#define DRFLAC_HAS_BYTESWAP_INTRINSIC
+#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define DRFLAC_HAS_BYTESWAP_INTRINSIC
+#elif defined(__clang__)
+    #if __has_builtin(__builtin_bswap16) && __has_builtin(__builtin_bswap32) && __has_builtin(__builtin_bswap64)
+    #define DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #endif
+#endif
+
+
+// Standard library stuff.
+#ifndef DRFLAC_ASSERT
+#include <assert.h>
+#define DRFLAC_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRFLAC_MALLOC
+#define DRFLAC_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRFLAC_REALLOC
+#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRFLAC_FREE
+#define DRFLAC_FREE(p)                      free((p))
+#endif
+#ifndef DRFLAC_COPY_MEMORY
+#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRFLAC_ZERO_MEMORY
+#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  // 64 for AVX-512 in the future.
+
 #ifdef _MSC_VER
 #define DRFLAC_INLINE __forceinline
 #else
+#ifdef __GNUC__
+#define DRFLAC_INLINE inline __attribute__((always_inline))
+#else
 #define DRFLAC_INLINE inline
 #endif
+#endif
+
+typedef drflac_int32 drflac_result;
+#define DRFLAC_SUCCESS                                  0
+#define DRFLAC_ERROR                                    -1  // A generic error.
+#define DRFLAC_INVALID_ARGS                             -2
+#define DRFLAC_END_OF_STREAM                            -128
+#define DRFLAC_CRC_MISMATCH                             -129
 
 #define DRFLAC_SUBFRAME_CONSTANT                        0
 #define DRFLAC_SUBFRAME_VERBATIM                        1
@@ -706,31 +882,68 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, ui
 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
 
 
-//// Endian Management ////
-static DRFLAC_INLINE dr_bool32 drflac__is_little_endian()
+#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
+#define drflac_assert                                   DRFLAC_ASSERT
+#define drflac_copy_memory                              DRFLAC_COPY_MEMORY
+#define drflac_zero_memory                              DRFLAC_ZERO_MEMORY
+
+
+// CPU caps.
+static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
+#ifndef DRFLAC_NO_CPUID
+static drflac_bool32 drflac__gIsSSE42Supported = DRFLAC_FALSE;
+static void drflac__init_cpu_caps()
 {
+    int info[4] = {0};
+
+    // LZCNT
+    drflac__cpuid(info, 0x80000001);
+    drflac__gIsLZCNTSupported = (info[2] & (1 <<  5)) != 0;
+
+    // SSE4.2
+    drflac__cpuid(info, 1);
+    drflac__gIsSSE42Supported = (info[2] & (1 << 19)) != 0;
+}
+#endif
+
+
+//// Endian Management ////
+static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian()
+{
+#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
+    return DRFLAC_TRUE;
+#else
     int n = 1;
     return (*(char*)&n) == 1;
+#endif
 }
 
-static DRFLAC_INLINE uint16_t drflac__swap_endian_uint16(uint16_t n)
+static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
 {
-#ifdef _MSC_VER
-    return _byteswap_ushort(n);
-#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-    return __builtin_bswap16(n);
+#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
 #else
     return ((n & 0xFF00) >> 8) |
            ((n & 0x00FF) << 8);
 #endif
 }
 
-static DRFLAC_INLINE uint32_t drflac__swap_endian_uint32(uint32_t n)
+static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
 {
-#ifdef _MSC_VER
-    return _byteswap_ulong(n);
-#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-    return __builtin_bswap32(n);
+#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap32(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
 #else
     return ((n & 0xFF000000) >> 24) |
            ((n & 0x00FF0000) >>  8) |
@@ -739,25 +952,30 @@ static DRFLAC_INLINE uint32_t drflac__swap_endian_uint32(uint32_t n)
 #endif
 }
 
-static DRFLAC_INLINE uint64_t drflac__swap_endian_uint64(uint64_t n)
+static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
 {
-#ifdef _MSC_VER
-    return _byteswap_uint64(n);
-#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-    return __builtin_bswap64(n);
+#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
 #else
-    return ((n & 0xFF00000000000000ULL) >> 56) |
-           ((n & 0x00FF000000000000ULL) >> 40) |
-           ((n & 0x0000FF0000000000ULL) >> 24) |
-           ((n & 0x000000FF00000000ULL) >>  8) |
-           ((n & 0x00000000FF000000ULL) <<  8) |
-           ((n & 0x0000000000FF0000ULL) << 24) |
-           ((n & 0x000000000000FF00ULL) << 40) |
-           ((n & 0x00000000000000FFULL) << 56);
+    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
+           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
+           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
+           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
+           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
+           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
+           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
+           ((n & (drflac_uint64)0x00000000000000FF) << 56);
 #endif
 }
 
-static DRFLAC_INLINE uint16_t drflac__be2host_16(uint16_t n)
+
+static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
 {
 #ifdef __linux__
     return be16toh(n);
@@ -770,7 +988,7 @@ static DRFLAC_INLINE uint16_t drflac__be2host_16(uint16_t n)
 #endif
 }
 
-static DRFLAC_INLINE uint32_t drflac__be2host_32(uint32_t n)
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
 {
 #ifdef __linux__
     return be32toh(n);
@@ -783,7 +1001,7 @@ static DRFLAC_INLINE uint32_t drflac__be2host_32(uint32_t n)
 #endif
 }
 
-static DRFLAC_INLINE uint64_t drflac__be2host_64(uint64_t n)
+static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
 {
 #ifdef __linux__
     return be64toh(n);
@@ -797,7 +1015,7 @@ static DRFLAC_INLINE uint64_t drflac__be2host_64(uint64_t n)
 }
 
 
-static DRFLAC_INLINE uint32_t drflac__le2host_32(uint32_t n)
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
 {
 #ifdef __linux__
     return le32toh(n);
@@ -811,13 +1029,242 @@ static DRFLAC_INLINE uint32_t drflac__le2host_32(uint32_t n)
 }
 
 
+static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
+{
+    drflac_uint32 result = 0;
+    result |= (n & 0x7F000000) >> 3;
+    result |= (n & 0x007F0000) >> 2;
+    result |= (n & 0x00007F00) >> 1;
+    result |= (n & 0x0000007F) >> 0;
+
+    return result;
+}
+
+
+
+// The CRC code below is based on this document: http://zlib.net/crc_v3.txt
+static drflac_uint8 drflac__crc8_table[] = {
+    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
+    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
+    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
+    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
+    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
+    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
+    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
+    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
+    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
+    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
+    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
+    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
+    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
+    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
+    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
+    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
+};
+
+static drflac_uint16 drflac__crc16_table[] = {
+    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
+    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
+    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
+    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
+    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
+    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
+    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
+    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
+    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
+    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
+    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
+    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
+    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
+    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
+    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
+    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
+    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
+    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
+    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
+    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
+    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
+    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
+    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
+    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
+    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
+    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
+    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
+    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
+    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
+    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
+    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
+};
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
+{
+    return drflac__crc8_table[crc ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
+{
+    drflac_assert(count <= 32);
+
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);")
+    drflac_uint8 p = 0x07;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint8 bit = (data & (1 << i)) >> i;
+        if (crc & 0x80) {
+            crc = ((crc << 1) | bit) ^ p;
+        } else {
+            crc = ((crc << 1) | bit);
+        }
+    }
+    return crc;
+#else
+    drflac_uint32 wholeBytes = count >> 3;
+    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
+{
+    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
+{
+    switch (byteCount)
+    {
+#ifdef DRFLAC_64BIT
+    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+    }
+
+    return crc;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
+{
+    drflac_assert(count <= 64);
+
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);")
+    drflac_uint16 p = 0x8005;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint16 bit = (data & (1ULL << i)) >> i;
+        if (r & 0x8000) {
+            r = ((r << 1) | bit) ^ p;
+        } else {
+            r = ((r << 1) | bit);
+        }
+    }
+
+    return crc;
+#else
+    drflac_uint32 wholeBytes = count >> 3;
+    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
+{
+    drflac_assert(count <= 64);
+
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+    drflac_uint32 wholeBytes = count >> 3;
+    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0xFF00000000000000 << leftoverBits)) >> (56 + leftoverBits)));
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00FF000000000000 << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000FF0000000000 << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000FF00000000 << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000FF000000 << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000000000FF0000 << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000000000FF00 << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000000000FF << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
+{
+#ifdef DRFLAC_64BIT
+    return drflac_crc16__64bit(crc, data, count);
+#else
+    return drflac_crc16__32bit(crc, data, count);
+#endif
+}
+
+
 #ifdef DRFLAC_64BIT
 #define drflac__be2host__cache_line drflac__be2host_64
 #else
 #define drflac__be2host__cache_line drflac__be2host_32
 #endif
 
-
 // BIT READING ATTEMPT #2
 //
 // This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
@@ -829,9 +1276,9 @@ static DRFLAC_INLINE uint32_t drflac__le2host_32(uint32_t n)
 #define DRFLAC_CACHE_L1_SIZE_BITS(bs)                   (sizeof((bs)->cache)*8)
 #define DRFLAC_CACHE_L1_BITS_REMAINING(bs)              (DRFLAC_CACHE_L1_SIZE_BITS(bs) - ((bs)->consumedBits))
 #ifdef DRFLAC_64BIT
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((uint64_t)-1LL) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((drflac_uint64)-1LL) >> (_bitCount)))
 #else
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((uint32_t)-1) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((drflac_uint32)-1) >> (_bitCount)))
 #endif
 #define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
 #define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)           (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
@@ -840,18 +1287,54 @@ static DRFLAC_INLINE uint32_t drflac__le2host_32(uint32_t n)
 #define DRFLAC_CACHE_L2_LINE_COUNT(bs)                  (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
 #define DRFLAC_CACHE_L2_LINES_REMAINING(bs)             (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
 
-static DRFLAC_INLINE dr_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
+
+#ifndef DR_FLAC_NO_CRC
+static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
+{
+    bs->crc16 = 0;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+}
+
+static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
+{
+    bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
+    bs->crc16CacheIgnoredBytes = 0;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
+{
+    // We should never be flushing in a situation where we are not aligned on a byte boundary.
+    drflac_assert((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
+
+    // The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    // by the number of bits that have been consumed.
+    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
+        drflac__update_crc16(bs);
+    } else {
+        // We only accumulate the consumed bits.
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
+
+        // The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        // so we can handle that later.
+        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+    }
+
+    return bs->crc16;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
 {
     // Fast path. Try loading straight from L2.
     if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
         bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DR_TRUE;
+        return DRFLAC_TRUE;
     }
 
     // If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
     // any left.
     if (bs->unalignedByteCount > 0) {
-        return DR_FALSE;   // If we have any unaligned bytes it means there's not more aligned bytes left in the client.
+        return DRFLAC_FALSE;   // If we have any unaligned bytes it means there's no more aligned bytes left in the client.
     }
 
     size_t bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
@@ -859,7 +1342,7 @@ static DRFLAC_INLINE dr_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
     bs->nextL2Line = 0;
     if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
         bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DR_TRUE;
+        return DRFLAC_TRUE;
     }
 
 
@@ -872,35 +1355,39 @@ static DRFLAC_INLINE dr_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
     // We need to keep track of any unaligned bytes for later use.
     bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
     if (bs->unalignedByteCount > 0) {
-        bs->unalignedCache  = bs->cacheL2[alignedL1LineCount];
+        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
     }
 
-    if (alignedL1LineCount > 0)
-    {
+    if (alignedL1LineCount > 0) {
         size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
         for (size_t i = alignedL1LineCount; i > 0; --i) {
             bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
         }
 
-        bs->nextL2Line = offset;
+        bs->nextL2Line = (drflac_uint32)offset;
         bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DR_TRUE;
-    }
-    else
-    {
+        return DRFLAC_TRUE;
+    } else {
         // If we get into this branch it means we weren't able to load any L1-aligned data.
         bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 }
 
-static dr_bool32 drflac__reload_cache(drflac_bs* bs)
+static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
 {
+#ifndef DR_FLAC_NO_CRC
+    drflac__update_crc16(bs);
+#endif
+
     // Fast path. Try just moving the next value in the L2 cache to the L1 cache.
     if (drflac__reload_l1_cache_from_l2(bs)) {
         bs->cache = drflac__be2host__cache_line(bs->cache);
         bs->consumedBits = 0;
-        return DR_TRUE;
+#ifndef DR_FLAC_NO_CRC
+        bs->crc16Cache = bs->cache;
+#endif
+        return DRFLAC_TRUE;
     }
 
     // Slow path.
@@ -910,15 +1397,21 @@ static dr_bool32 drflac__reload_cache(drflac_bs* bs)
     // data from the unaligned cache.
     size_t bytesRead = bs->unalignedByteCount;
     if (bytesRead == 0) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    assert(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
-    bs->consumedBits = (DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
+    drflac_assert(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
 
     bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
     bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs->consumedBits);    // <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property.
-    return DR_TRUE;
+    bs->unalignedByteCount = 0;     // <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes.
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = bs->cache >> bs->consumedBits;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+#endif
+    return DRFLAC_TRUE;
 }
 
 static void drflac__reset_cache(drflac_bs* bs)
@@ -928,69 +1421,24 @@ static void drflac__reset_cache(drflac_bs* bs)
     bs->cache = 0;
     bs->unalignedByteCount = 0;                         // <-- This clears the trailing unaligned bytes.
     bs->unalignedCache = 0;
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = 0;
+    bs->crc16CacheIgnoredBytes = 0;
+#endif
 }
 
-static dr_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
 {
-    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        bs->consumedBits += bitsToSeek;
-        bs->cache <<= bitsToSeek;
-        return DR_TRUE;
-    } else {
-        // It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here.
-        bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->cache = 0;
-
-        size_t wholeBytesRemaining = bitsToSeek/8;
-        if (wholeBytesRemaining > 0)
-        {
-            // The next bytes to seek will be located in the L2 cache. The problem is that the L2 cache is not byte aligned,
-            // but rather DRFLAC_CACHE_L1_SIZE_BYTES aligned (usually 4 or 8). If, for example, the number of bytes to seek is
-            // 3, we'll need to handle it in a special way.
-            size_t wholeCacheLinesRemaining = wholeBytesRemaining / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
-            if (wholeCacheLinesRemaining < DRFLAC_CACHE_L2_LINES_REMAINING(bs))
-            {
-                wholeBytesRemaining -= wholeCacheLinesRemaining * DRFLAC_CACHE_L1_SIZE_BYTES(bs);
-                bitsToSeek -= wholeCacheLinesRemaining * DRFLAC_CACHE_L1_SIZE_BITS(bs);
-                bs->nextL2Line += wholeCacheLinesRemaining;
-            }
-            else
-            {
-                wholeBytesRemaining -= DRFLAC_CACHE_L2_LINES_REMAINING(bs) * DRFLAC_CACHE_L1_SIZE_BYTES(bs);
-                bitsToSeek -= DRFLAC_CACHE_L2_LINES_REMAINING(bs) * DRFLAC_CACHE_L1_SIZE_BITS(bs);
-                bs->nextL2Line += DRFLAC_CACHE_L2_LINES_REMAINING(bs);
-
-                if (wholeBytesRemaining > 0) {
-                    bs->onSeek(bs->pUserData, (int)wholeBytesRemaining, drflac_seek_origin_current);
-                    bitsToSeek -= wholeBytesRemaining*8;
-                }
-            }
-        }
-
-
-        if (bitsToSeek > 0) {
-            if (!drflac__reload_cache(bs)) {
-                return DR_FALSE;
-            }
-
-            return drflac__seek_bits(bs, bitsToSeek);
-        }
-
-        return DR_TRUE;
-    }
-}
-
-static dr_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, uint32_t* pResultOut)
-{
-    assert(bs != NULL);
-    assert(pResultOut != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 32);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResultOut != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 32);
 
     if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
         if (!drflac__reload_cache(bs)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
     }
 
@@ -1000,165 +1448,259 @@ static dr_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, uint3
             bs->consumedBits += bitCount;
             bs->cache <<= bitCount;
         } else {
-            *pResultOut = (uint32_t)bs->cache;
+            *pResultOut = (drflac_uint32)bs->cache;
             bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
             bs->cache = 0;
         }
-        return DR_TRUE;
+        return DRFLAC_TRUE;
     } else {
         // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them.
-        size_t bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        size_t bitCountLo = bitCount - bitCountHi;
-        uint32_t resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
+        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        drflac_uint32 bitCountLo = bitCount - bitCountHi;
+        drflac_uint32 resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
 
         if (!drflac__reload_cache(bs)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
         *pResultOut = (resultHi << bitCountLo) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
         bs->consumedBits += bitCountLo;
         bs->cache <<= bitCountLo;
-        return DR_TRUE;
+        return DRFLAC_TRUE;
     }
 }
 
-static dr_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, int32_t* pResult)
+static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
 {
-    assert(bs != NULL);
-    assert(pResult != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 32);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResult != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 32);
 
-    uint32_t result;
+    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    uint32_t signbit = ((result >> (bitCount-1)) & 0x01);
+    drflac_uint32 signbit = ((result >> (bitCount-1)) & 0x01);
     result |= (~signbit + 1) << bitCount;
 
-    *pResult = (int32_t)result;
-    return DR_TRUE;
+    *pResult = (drflac_int32)result;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, uint64_t* pResultOut)
+static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
 {
-    assert(bitCount <= 64);
-    assert(bitCount >  32);
+    drflac_assert(bitCount <= 64);
+    drflac_assert(bitCount >  32);
 
-    uint32_t resultHi;
+    drflac_uint32 resultHi;
     if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    uint32_t resultLo;
+    drflac_uint32 resultLo;
     if (!drflac__read_uint32(bs, 32, &resultLo)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    *pResultOut = (((uint64_t)resultHi) << 32) | ((uint64_t)resultLo);
-    return DR_TRUE;
+    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
+    return DRFLAC_TRUE;
 }
 
 // Function below is unused, but leaving it here in case I need to quickly add it again.
 #if 0
-static dr_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, int64_t* pResultOut)
+static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
 {
-    assert(bitCount <= 64);
+    drflac_assert(bitCount <= 64);
 
-    uint64_t result;
+    drflac_uint64 result;
     if (!drflac__read_uint64(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    uint64_t signbit = ((result >> (bitCount-1)) & 0x01);
+    drflac_uint64 signbit = ((result >> (bitCount-1)) & 0x01);
     result |= (~signbit + 1) << bitCount;
 
-    *pResultOut = (int64_t)result;
-    return DR_TRUE;
+    *pResultOut = (drflac_int64)result;
+    return DRFLAC_TRUE;
 }
 #endif
 
-static dr_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, uint16_t* pResult)
+static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
 {
-    assert(bs != NULL);
-    assert(pResult != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 16);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResult != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 16);
 
-    uint32_t result;
+    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    *pResult = (uint16_t)result;
-    return DR_TRUE;
+    *pResult = (drflac_uint16)result;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, int16_t* pResult)
+#if 0
+static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
 {
-    assert(bs != NULL);
-    assert(pResult != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 16);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResult != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 16);
 
-    int32_t result;
+    drflac_int32 result;
     if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    *pResult = (int16_t)result;
-    return DR_TRUE;
+    *pResult = (drflac_int16)result;
+    return DRFLAC_TRUE;
 }
+#endif
 
-static dr_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, uint8_t* pResult)
+static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
 {
-    assert(bs != NULL);
-    assert(pResult != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 8);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResult != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 8);
 
-    uint32_t result;
+    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    *pResult = (uint8_t)result;
-    return DR_TRUE;
+    *pResult = (drflac_uint8)result;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, int8_t* pResult)
+static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
 {
-    assert(bs != NULL);
-    assert(pResult != NULL);
-    assert(bitCount > 0);
-    assert(bitCount <= 8);
+    drflac_assert(bs != NULL);
+    drflac_assert(pResult != NULL);
+    drflac_assert(bitCount > 0);
+    drflac_assert(bitCount <= 8);
 
-    int32_t result;
+    drflac_int32 result;
     if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    *pResult = (int8_t)result;
-    return DR_TRUE;
+    *pResult = (drflac_int8)result;
+    return DRFLAC_TRUE;
 }
 
 
-static inline dr_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
 {
-    unsigned int zeroCounter = 0;
-    while (bs->cache == 0) {
-        zeroCounter += (unsigned int)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        if (!drflac__reload_cache(bs)) {
-            return DR_FALSE;
+    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        bs->consumedBits += (drflac_uint32)bitsToSeek;
+        bs->cache <<= bitsToSeek;
+        return DRFLAC_TRUE;
+    } else {
+        // It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here.
+        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->cache         = 0;
+
+        // Simple case. Seek in groups of the same number as bits that fit within a cache line.
+#ifdef DRFLAC_64BIT
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint64 bin;
+            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#else
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint32 bin;
+            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#endif
+
+        // Whole leftover bytes.
+        while (bitsToSeek >= 8) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, 8, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= 8;
+        }
+
+        // Leftover bits.
+        if (bitsToSeek > 0) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek = 0; // <-- Necessary for the assert below.
+        }
+
+        drflac_assert(bitsToSeek == 0);
+        return DRFLAC_TRUE;
+    }
+}
+
+
+// This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16.
+static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
+{
+    drflac_assert(bs != NULL);
+
+    // The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    // thing to do is align to the next byte.
+    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+        return DRFLAC_FALSE;
+    }
+
+    for (;;) {
+#ifndef DR_FLAC_NO_CRC
+        drflac__reset_crc16(bs);
+#endif
+
+        drflac_uint8 hi;
+        if (!drflac__read_uint8(bs, 8, &hi)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (hi == 0xFF) {
+            drflac_uint8 lo;
+            if (!drflac__read_uint8(bs, 6, &lo)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (lo == 0x3E) {
+                return DRFLAC_TRUE;
+            } else {
+                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+                    return DRFLAC_FALSE;
+                }
+            }
         }
     }
 
-    // At this point the cache should not be zero, in which case we know the first set bit should be somewhere in here. There is
-    // no need for us to perform any cache reloading logic here which should make things much faster.
-    assert(bs->cache != 0);
+    // Should never get here.
+    //return DRFLAC_FALSE;
+}
 
-    unsigned int bitOffsetTable[] = {
+
+#if !defined(DR_FLAC_NO_SIMD) && defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+#define DRFLAC_IMPLEMENT_CLZ_LZCNT
+#endif
+#if  defined(_MSC_VER) && _MSC_VER >= 1400
+#define DRFLAC_IMPLEMENT_CLZ_MSVC
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
+{
+    static drflac_uint32 clz_table_4[] = {
         0,
         4,
         3, 3,
@@ -1166,91 +1708,169 @@ static inline dr_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned i
         1, 1, 1, 1, 1, 1, 1, 1
     };
 
-    unsigned int setBitOffsetPlus1 = bitOffsetTable[DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, 4)];
-    if (setBitOffsetPlus1 == 0) {
-        if (bs->cache == 1) {
-            setBitOffsetPlus1 = DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        } else {
-            setBitOffsetPlus1 = 5;
-            for (;;)
-            {
-                if ((bs->cache & DRFLAC_CACHE_L1_SELECT(bs, setBitOffsetPlus1))) {
-                    break;
-                }
+    drflac_uint32 n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (n == 0) {
+#ifdef DRFLAC_64BIT
+        if ((x & 0xFFFFFFFF00000000ULL) == 0) { n  = 32; x <<= 32; }
+        if ((x & 0xFFFF000000000000ULL) == 0) { n += 16; x <<= 16; }
+        if ((x & 0xFF00000000000000ULL) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF000000000000000ULL) == 0) { n += 4;  x <<= 4;  }
+#else
+        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
+        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
+#endif
+        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
+    }
 
-                setBitOffsetPlus1 += 1;
-            }
+    return n - 1;
+}
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported()
+{
+    // If the compiler itself does not support the intrinsic then we'll need to return false.
+#ifdef DRFLAC_HAS_LZCNT_INTRINSIC
+    return drflac__gIsLZCNTSupported;
+#else
+    return DRFLAC_FALSE;
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
+{
+#ifdef _MSC_VER
+    #ifdef DRFLAC_64BIT
+        return (drflac_uint32)__lzcnt64(x);
+    #else
+        return (drflac_uint32)__lzcnt(x);
+    #endif
+#else
+    #if defined(__GNUC__) || defined(__clang__)
+        #ifdef DRFLAC_64BIT
+            return (drflac_uint32)__builtin_clzll((unsigned long long)x);
+        #else
+            return (drflac_uint32)__builtin_clzl((unsigned long)x);
+        #endif
+    #else
+        // Unsupported compiler.
+        #error "This compiler does not support the lzcnt intrinsic."
+    #endif
+#endif
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
+{
+    drflac_uint32 n;
+#ifdef DRFLAC_64BIT
+    _BitScanReverse64((unsigned long*)&n, x);
+#else
+    _BitScanReverse((unsigned long*)&n, x);
+#endif
+    return sizeof(x)*8 - n - 1;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
+{
+    // This function assumes at least one bit is set. Checking for 0 needs to be done at a higher level, outside this function.
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+    if (drflac__is_lzcnt_supported()) {
+        return drflac__clz_lzcnt(x);
+    } else
+#endif
+    {
+    #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+        return drflac__clz_msvc(x);
+    #else
+        return drflac__clz_software(x);
+    #endif
+    }
+}
+
+
+static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
         }
     }
 
+    drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
     bs->consumedBits += setBitOffsetPlus1;
     bs->cache <<= setBitOffsetPlus1;
 
     *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
 
-static dr_bool32 drflac__seek_to_byte(drflac_bs* bs, uint64_t offsetFromStart)
+static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
 {
-    assert(bs != NULL);
-    assert(offsetFromStart > 0);
+    drflac_assert(bs != NULL);
+    drflac_assert(offsetFromStart > 0);
 
     // Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
     // is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
     // To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
-    if (offsetFromStart > 0x7FFFFFFF)
-    {
-        uint64_t bytesRemaining = offsetFromStart;
+    if (offsetFromStart > 0x7FFFFFFF) {
+        drflac_uint64 bytesRemaining = offsetFromStart;
         if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
         bytesRemaining -= 0x7FFFFFFF;
 
-
         while (bytesRemaining > 0x7FFFFFFF) {
             if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             bytesRemaining -= 0x7FFFFFFF;
         }
 
-
         if (bytesRemaining > 0) {
             if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         }
-    }
-    else
-    {
+    } else {
         if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
     }
 
-
     // The cache should be reset to force a reload of fresh data from the client.
     drflac__reset_cache(bs);
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
-static dr_bool32 drflac__read_utf8_coded_number(drflac_bs* bs, uint64_t* pNumberOut)
+static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
 {
-    assert(bs != NULL);
-    assert(pNumberOut != NULL);
+    drflac_assert(bs != NULL);
+    drflac_assert(pNumberOut != NULL);
+
+    drflac_uint8 crc = *pCRCOut;
 
     unsigned char utf8[7] = {0};
     if (!drflac__read_uint8(bs, 8, utf8)) {
         *pNumberOut = 0;
-        return DR_FALSE;
+        return DRFLAC_END_OF_STREAM;
     }
+    crc = drflac_crc8(crc, utf8[0], 8);
 
     if ((utf8[0] & 0x80) == 0) {
         *pNumberOut = utf8[0];
-        return DR_TRUE;
+        *pCRCOut = crc;
+        return DRFLAC_SUCCESS;
     }
 
     int byteCount = 1;
@@ -1268,57 +1888,43 @@ static dr_bool32 drflac__read_utf8_coded_number(drflac_bs* bs, uint64_t* pNumber
         byteCount = 7;
     } else {
         *pNumberOut = 0;
-        return DR_FALSE;     // Bad UTF-8 encoding.
+        return DRFLAC_CRC_MISMATCH;     // Bad UTF-8 encoding.
     }
 
     // Read extra bytes.
-    assert(byteCount > 1);
+    drflac_assert(byteCount > 1);
 
-    uint64_t result = (uint64_t)(utf8[0] & (0xFF >> (byteCount + 1)));
+    drflac_uint64 result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
     for (int i = 1; i < byteCount; ++i) {
         if (!drflac__read_uint8(bs, 8, utf8 + i)) {
             *pNumberOut = 0;
-            return DR_FALSE;
+            return DRFLAC_END_OF_STREAM;
         }
+        crc = drflac_crc8(crc, utf8[i], 8);
 
         result = (result << 6) | (utf8[i] & 0x3F);
     }
 
     *pNumberOut = result;
-    return DR_TRUE;
+    *pCRCOut = crc;
+    return DRFLAC_SUCCESS;
 }
 
 
 
-static DRFLAC_INLINE dr_bool32 drflac__read_and_seek_rice(drflac_bs* bs, uint8_t m)
-{
-    unsigned int unused;
-    if (!drflac__seek_past_next_set_bit(bs, &unused)) {
-        return DR_FALSE;
-    }
-
-    if (m > 0) {
-        if (!drflac__seek_bits(bs, m)) {
-            return DR_FALSE;
-        }
-    }
-
-    return DR_TRUE;
-}
-
 
 // The next two functions are responsible for calculating the prediction.
 //
 // When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
 // safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
-static DRFLAC_INLINE int32_t drflac__calculate_prediction_32(uint32_t order, int32_t shift, const int16_t* coefficients, int32_t* pDecodedSamples)
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
-    assert(order <= 32);
+    drflac_assert(order <= 32);
 
     // 32-bit version.
 
     // VC++ optimizes this to a single jmp. I've not yet verified this for other compilers.
-    int32_t prediction = 0;
+    drflac_int32 prediction = 0;
 
     switch (order)
     {
@@ -1356,137 +1962,137 @@ static DRFLAC_INLINE int32_t drflac__calculate_prediction_32(uint32_t order, int
     case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
     }
 
-    return (int32_t)(prediction >> shift);
+    return (drflac_int32)(prediction >> shift);
 }
 
-static DRFLAC_INLINE int32_t drflac__calculate_prediction_64(uint32_t order, int32_t shift, const int16_t* coefficients, int32_t* pDecodedSamples)
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
-    assert(order <= 32);
+    drflac_assert(order <= 32);
 
     // 64-bit version.
 
     // This method is faster on the 32-bit build when compiling with VC++. See note below.
 #ifndef DRFLAC_64BIT
-    int64_t prediction;
+    drflac_int64 prediction;
     if (order == 8)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3] * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4] * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5] * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6] * (int64_t)pDecodedSamples[-7];
-        prediction += coefficients[7] * (int64_t)pDecodedSamples[-8];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
     }
     else if (order == 7)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3] * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4] * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5] * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6] * (int64_t)pDecodedSamples[-7];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
     }
     else if (order == 3)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
     }
     else if (order == 6)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3] * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4] * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5] * (int64_t)pDecodedSamples[-6];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
     }
     else if (order == 5)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3] * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4] * (int64_t)pDecodedSamples[-5];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
     }
     else if (order == 4)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2] * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3] * (int64_t)pDecodedSamples[-4];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
     }
     else if (order == 12)
     {
-        prediction  = coefficients[0]  * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (int64_t)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (int64_t)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (int64_t)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (int64_t)pDecodedSamples[-10];
-        prediction += coefficients[10] * (int64_t)pDecodedSamples[-11];
-        prediction += coefficients[11] * (int64_t)pDecodedSamples[-12];
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
     }
     else if (order == 2)
     {
-        prediction  = coefficients[0] * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1] * (int64_t)pDecodedSamples[-2];
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
     }
     else if (order == 1)
     {
-        prediction = coefficients[0] * (int64_t)pDecodedSamples[-1];
+        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
     }
     else if (order == 10)
     {
-        prediction  = coefficients[0]  * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (int64_t)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (int64_t)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (int64_t)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (int64_t)pDecodedSamples[-10];
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
     }
     else if (order == 9)
     {
-        prediction  = coefficients[0]  * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (int64_t)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (int64_t)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (int64_t)pDecodedSamples[-9];
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
     }
     else if (order == 11)
     {
-        prediction  = coefficients[0]  * (int64_t)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (int64_t)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (int64_t)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (int64_t)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (int64_t)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (int64_t)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (int64_t)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (int64_t)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (int64_t)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (int64_t)pDecodedSamples[-10];
-        prediction += coefficients[10] * (int64_t)pDecodedSamples[-11];
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
     }
     else
     {
         prediction = 0;
         for (int j = 0; j < (int)order; ++j) {
-            prediction += coefficients[j] * (int64_t)pDecodedSamples[-j-1];
+            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
         }
     }
 #endif
@@ -1494,190 +2100,268 @@ static DRFLAC_INLINE int32_t drflac__calculate_prediction_64(uint32_t order, int
     // VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
     // reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
 #ifdef DRFLAC_64BIT
-    int64_t prediction = 0;
+    drflac_int64 prediction = 0;
 
     switch (order)
     {
-    case 32: prediction += coefficients[31] * (int64_t)pDecodedSamples[-32];
-    case 31: prediction += coefficients[30] * (int64_t)pDecodedSamples[-31];
-    case 30: prediction += coefficients[29] * (int64_t)pDecodedSamples[-30];
-    case 29: prediction += coefficients[28] * (int64_t)pDecodedSamples[-29];
-    case 28: prediction += coefficients[27] * (int64_t)pDecodedSamples[-28];
-    case 27: prediction += coefficients[26] * (int64_t)pDecodedSamples[-27];
-    case 26: prediction += coefficients[25] * (int64_t)pDecodedSamples[-26];
-    case 25: prediction += coefficients[24] * (int64_t)pDecodedSamples[-25];
-    case 24: prediction += coefficients[23] * (int64_t)pDecodedSamples[-24];
-    case 23: prediction += coefficients[22] * (int64_t)pDecodedSamples[-23];
-    case 22: prediction += coefficients[21] * (int64_t)pDecodedSamples[-22];
-    case 21: prediction += coefficients[20] * (int64_t)pDecodedSamples[-21];
-    case 20: prediction += coefficients[19] * (int64_t)pDecodedSamples[-20];
-    case 19: prediction += coefficients[18] * (int64_t)pDecodedSamples[-19];
-    case 18: prediction += coefficients[17] * (int64_t)pDecodedSamples[-18];
-    case 17: prediction += coefficients[16] * (int64_t)pDecodedSamples[-17];
-    case 16: prediction += coefficients[15] * (int64_t)pDecodedSamples[-16];
-    case 15: prediction += coefficients[14] * (int64_t)pDecodedSamples[-15];
-    case 14: prediction += coefficients[13] * (int64_t)pDecodedSamples[-14];
-    case 13: prediction += coefficients[12] * (int64_t)pDecodedSamples[-13];
-    case 12: prediction += coefficients[11] * (int64_t)pDecodedSamples[-12];
-    case 11: prediction += coefficients[10] * (int64_t)pDecodedSamples[-11];
-    case 10: prediction += coefficients[ 9] * (int64_t)pDecodedSamples[-10];
-    case  9: prediction += coefficients[ 8] * (int64_t)pDecodedSamples[- 9];
-    case  8: prediction += coefficients[ 7] * (int64_t)pDecodedSamples[- 8];
-    case  7: prediction += coefficients[ 6] * (int64_t)pDecodedSamples[- 7];
-    case  6: prediction += coefficients[ 5] * (int64_t)pDecodedSamples[- 6];
-    case  5: prediction += coefficients[ 4] * (int64_t)pDecodedSamples[- 5];
-    case  4: prediction += coefficients[ 3] * (int64_t)pDecodedSamples[- 4];
-    case  3: prediction += coefficients[ 2] * (int64_t)pDecodedSamples[- 3];
-    case  2: prediction += coefficients[ 1] * (int64_t)pDecodedSamples[- 2];
-    case  1: prediction += coefficients[ 0] * (int64_t)pDecodedSamples[- 1];
+    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
     }
 #endif
 
-    return (int32_t)(prediction >> shift);
+    return (drflac_int32)(prediction >> shift);
 }
 
-
-// Reads and decodes a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes.
-//
-// This is the most frequently called function in the library. It does both the Rice decoding and the prediction in a single loop
-// iteration. The prediction is done at the end, and there's an annoying branch I'd like to avoid so the main function is defined
-// as a #define - sue me!
-#define DRFLAC__DECODE_SAMPLES_WITH_RESIDULE__RICE__PROC(funcName, predictionFunc)                                                                                  \
-static dr_bool32 funcName (drflac_bs* bs, uint32_t count, uint8_t riceParam, uint32_t order, int32_t shift, const int16_t* coefficients, int32_t* pSamplesOut)           \
-{                                                                                                                                                                   \
-    assert(bs != NULL);                                                                                                                                             \
-    assert(count > 0);                                                                                                                                              \
-    assert(pSamplesOut != NULL);                                                                                                                                    \
-                                                                                                                                                                    \
-    static unsigned int bitOffsetTable[] = {                                                                                                                        \
-        0,                                                                                                                                                          \
-        4,                                                                                                                                                          \
-        3, 3,                                                                                                                                                       \
-        2, 2, 2, 2,                                                                                                                                                 \
-        1, 1, 1, 1, 1, 1, 1, 1                                                                                                                                      \
-    };                                                                                                                                                              \
-                                                                                                                                                                    \
-    drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);                                                                                       \
-    drflac_cache_t resultHiShift = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParam;                                                                                       \
-                                                                                                                                                                    \
-    for (int i = 0; i < (int)count; ++i)                                                                                                                            \
-    {                                                                                                                                                               \
-        unsigned int zeroCounter = 0;                                                                                                                               \
-        while (bs->cache == 0) {                                                                                                                                    \
-            zeroCounter += (unsigned int)DRFLAC_CACHE_L1_BITS_REMAINING(bs);                                                                                        \
-            if (!drflac__reload_cache(bs)) {                                                                                                                        \
-                return DR_FALSE;                                                                                                                                       \
-            }                                                                                                                                                       \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        /* At this point the cache should not be zero, in which case we know the first set bit should be somewhere in here. There is                                \
-           no need for us to perform any cache reloading logic here which should make things much faster. */                                                        \
-        assert(bs->cache != 0);                                                                                                                                     \
-        unsigned int decodedRice;                                                                                                                                   \
-                                                                                                                                                                    \
-        unsigned int setBitOffsetPlus1 = bitOffsetTable[DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, 4)];                                                                   \
-        if (setBitOffsetPlus1 > 0) {                                                                                                                                \
-            decodedRice = (zeroCounter + (setBitOffsetPlus1-1)) << riceParam;                                                                                       \
-        } else {                                                                                                                                                    \
-            if (bs->cache == 1) {                                                                                                                                   \
-                setBitOffsetPlus1 = DRFLAC_CACHE_L1_SIZE_BITS(bs);                                                                                                  \
-                decodedRice = (zeroCounter + (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)) << riceParam;                                                                       \
-            } else {                                                                                                                                                \
-                setBitOffsetPlus1 = 5;                                                                                                                              \
-                for (;;)                                                                                                                                            \
-                {                                                                                                                                                   \
-                    if ((bs->cache & DRFLAC_CACHE_L1_SELECT(bs, setBitOffsetPlus1))) {                                                                              \
-                        decodedRice = (zeroCounter + (setBitOffsetPlus1-1)) << riceParam;                                                                           \
-                        break;                                                                                                                                      \
-                    }                                                                                                                                               \
-                                                                                                                                                                    \
-                    setBitOffsetPlus1 += 1;                                                                                                                         \
-                }                                                                                                                                                   \
-            }                                                                                                                                                       \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-                                                                                                                                                                    \
-        unsigned int bitsLo = 0;                                                                                                                                    \
-        unsigned int riceLength = setBitOffsetPlus1 + riceParam;                                                                                                    \
-        if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs))                                                                                                        \
-        {                                                                                                                                                           \
-            bitsLo = (unsigned int)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> (DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceLength));                            \
-                                                                                                                                                                    \
-            bs->consumedBits += riceLength;                                                                                                                         \
-            bs->cache <<= riceLength;                                                                                                                               \
-        }                                                                                                                                                           \
-        else                                                                                                                                                        \
-        {                                                                                                                                                           \
-            bs->consumedBits += riceLength;                                                                                                                         \
-            bs->cache <<= setBitOffsetPlus1;                                                                                                                        \
-                                                                                                                                                                    \
-            /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */                \
-            size_t bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);                                                                                   \
-            drflac_cache_t resultHi = bs->cache & riceParamMask;    /* <-- This mask is OK because all bits after the first bits are always zero. */                \
-                                                                                                                                                                    \
-                                                                                                                                                                    \
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {                                                                                                  \
-                bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);                                                                             \
-            } else {                                                                                                                                                \
-                /* Slow path. We need to fetch more data from the client. */                                                                                        \
-                if (!drflac__reload_cache(bs)) {                                                                                                                    \
-                    return DR_FALSE;                                                                                                                                   \
-                }                                                                                                                                                   \
-            }                                                                                                                                                       \
-                                                                                                                                                                    \
-            bitsLo = (unsigned int)((resultHi >> resultHiShift) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo));                                                \
-            bs->consumedBits = bitCountLo;                                                                                                                          \
-            bs->cache <<= bitCountLo;                                                                                                                               \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        decodedRice |= bitsLo;                                                                                                                                      \
-        decodedRice = (decodedRice >> 1) ^ (~(decodedRice & 0x01) + 1);   /* <-- Ah, much faster! :) */                                                             \
-        /*                                                                                                                                                          \
-        if ((decodedRice & 0x01)) {                                                                                                                                 \
-            decodedRice = ~(decodedRice >> 1);                                                                                                                      \
-        } else {                                                                                                                                                    \
-            decodedRice = (decodedRice >> 1);                                                                                                                       \
-        }                                                                                                                                                           \
-        */                                                                                                                                                          \
-                                                                                                                                                                    \
-        /* In order to properly calculate the prediction when the bits per sample is >16 we need to do it using 64-bit arithmetic. We can assume this               \
-           is probably going to be slower on 32-bit systems so we'll do a more optimized 32-bit version when the bits per sample is low enough.*/                   \
-        pSamplesOut[i] = ((int)decodedRice + predictionFunc(order, shift, coefficients, pSamplesOut + i));                                                          \
-    }                                                                                                                                                               \
-                                                                                                                                                                    \
-    return DR_TRUE;                                                                                                                                                    \
-}                                                                                                                                                                   \
-
-DRFLAC__DECODE_SAMPLES_WITH_RESIDULE__RICE__PROC(drflac__decode_samples_with_residual__rice_64, drflac__calculate_prediction_64)
-DRFLAC__DECODE_SAMPLES_WITH_RESIDULE__RICE__PROC(drflac__decode_samples_with_residual__rice_32, drflac__calculate_prediction_32)
-
-
-// Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes.
-static dr_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, uint32_t count, uint8_t riceParam)
+#if 0
+// Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+// sake of readability and should only be used as a reference.
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
-    assert(bs != NULL);
-    assert(count > 0);
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+    drflac_assert(pSamplesOut != NULL);
 
-    for (uint32_t i = 0; i < count; ++i) {
-        if (!drflac__read_and_seek_rice(bs, riceParam)) {
-            return DR_FALSE;
+    for (drflac_uint32 i = 0; i < count; ++i) {
+        drflac_uint32 zeroCounter = 0;
+        for (;;) {
+            drflac_uint8 bit;
+            if (!drflac__read_uint8(bs, 1, &bit)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (bit == 0) {
+                zeroCounter += 1;
+            } else {
+                break;
+            }
+        }
+
+        drflac_uint32 decodedRice;
+        if (riceParam > 0) {
+            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            decodedRice = 0;
+        }
+
+        decodedRice |= (zeroCounter << riceParam);
+        if ((decodedRice & 0x01)) {
+            decodedRice = ~(decodedRice >> 1);
+        } else {
+            decodedRice =  (decodedRice >> 1);
+        }
+
+
+        if (bitsPerSample > 16) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
         }
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    for (;;) {
+        drflac_uint8 bit;
+        if (!drflac__read_uint8(bs, 1, &bit)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (bit == 0) {
+            zeroCounter += 1;
+        } else {
+            break;
+        }
+    }
+
+    drflac_uint32 decodedRice;
+    if (riceParam > 0) {
+        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+            return DRFLAC_FALSE;
+        }
+    } else {
+        decodedRice = 0;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = decodedRice;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+    drflac_cache_t resultHiShift = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParam;
+
+
+    drflac_uint32 zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
+
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength = setBitOffsetPlus1 + riceParam;
+    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> (DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceLength));
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= riceLength;
+    } else {
+        bs->consumedBits += riceLength;
+        if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            bs->cache <<= setBitOffsetPlus1;
+        }
+
+        // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them.
+        drflac_uint32 bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        drflac_cache_t resultHi = bs->cache & riceParamMask;    // <-- This mask is OK because all bits after the first bits are always zero.
+
+        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+        #ifndef DR_FLAC_NO_CRC
+            drflac__update_crc16(bs);
+        #endif
+            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+            bs->consumedBits = 0;
+        #ifndef DR_FLAC_NO_CRC
+            bs->crc16Cache = bs->cache;
+        #endif
+        } else {
+            // Slow path. We need to fetch more data from the client.
+            if (!drflac__reload_cache(bs)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        riceParamPart = (drflac_uint32)((resultHi >> resultHiShift) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo));
+
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = riceParamPart;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, uint32_t bitsPerSample, uint32_t count, uint8_t unencodedBitsPerSample, uint32_t order, int32_t shift, const int16_t* coefficients, int32_t* pSamplesOut)
-{
-    assert(bs != NULL);
-    assert(count > 0);
-    assert(unencodedBitsPerSample > 0 && unencodedBitsPerSample <= 32);
-    assert(pSamplesOut != NULL);
 
-    for (unsigned int i = 0; i < count; ++i)
-    {
+static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+    drflac_assert(pSamplesOut != NULL);
+
+    drflac_uint32 zeroCountPart;
+    drflac_uint32 riceParamPart;
+
+    drflac_uint32 i = 0;
+    while (i < count) {
+        // Rice extraction.
+        if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) {
+            return DRFLAC_FALSE;
+        }
+
+        // Rice reconstruction.
+        static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+        riceParamPart |= (zeroCountPart << riceParam);
+        riceParamPart  = (riceParamPart >> 1) ^ t[riceParamPart & 0x01];
+        //riceParamPart  = (riceParamPart >> 1) ^ (~(riceParamPart & 0x01) + 1);
+
+        // Sample reconstruction.
+        if (bitsPerSample > 16) {
+            pSamplesOut[i] = riceParamPart + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = riceParamPart + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+
+        i += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+#if 0
+    return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+#else
+    return drflac__decode_samples_with_residual__rice__simple(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+#endif
+}
+
+// Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes.
+static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
+{
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+
+    for (drflac_uint32 i = 0; i < count; ++i) {
+        drflac_uint32 zeroCountPart;
+        drflac_uint32 riceParamPart;
+        if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+    drflac_assert(unencodedBitsPerSample > 0 && unencodedBitsPerSample <= 32);
+    drflac_assert(pSamplesOut != NULL);
+
+    for (unsigned int i = 0; i < count; ++i) {
         if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
         if (bitsPerSample > 16) {
@@ -1687,53 +2371,52 @@ static dr_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs,
         }
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
 // Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
 // when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
 // <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-static dr_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, uint32_t bitsPerSample, uint32_t blockSize, uint32_t order, int32_t shift, const int16_t* coefficients, int32_t* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
-    assert(bs != NULL);
-    assert(blockSize != 0);
-    assert(pDecodedSamples != NULL);       // <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode?
+    drflac_assert(bs != NULL);
+    drflac_assert(blockSize != 0);
+    drflac_assert(pDecodedSamples != NULL);       // <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode?
 
-    uint8_t residualMethod;
+    drflac_uint8 residualMethod;
     if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DR_FALSE;    // Unknown or unsupported residual coding method.
+        return DRFLAC_FALSE;    // Unknown or unsupported residual coding method.
     }
 
     // Ignore the first <order> values.
     pDecodedSamples += order;
 
 
-    uint8_t partitionOrder;
+    drflac_uint8 partitionOrder;
     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
 
-    uint32_t samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    uint32_t partitionsRemaining = (1 << partitionOrder);
-    for (;;)
-    {
-        uint8_t riceParam = 0;
+    drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    drflac_uint32 partitionsRemaining = (1 << partitionOrder);
+    for (;;) {
+        drflac_uint8 riceParam = 0;
         if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
             if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             if (riceParam == 16) {
                 riceParam = 0xFF;
             }
         } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
             if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             if (riceParam == 32) {
                 riceParam = 0xFF;
@@ -1741,23 +2424,17 @@ static dr_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, uint32_t bi
         }
 
         if (riceParam != 0xFF) {
-            if (bitsPerSample > 16) {
-                if (!drflac__decode_samples_with_residual__rice_64(bs, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
-                    return DR_FALSE;
-                }
-            } else {
-                if (!drflac__decode_samples_with_residual__rice_32(bs, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
-                    return DR_FALSE;
-                }
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
             }
         } else {
             unsigned char unencodedBitsPerSample = 0;
             if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         }
 
@@ -1772,46 +2449,46 @@ static dr_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, uint32_t bi
         samplesInPartition = blockSize / (1 << partitionOrder);
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 // Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
 // when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
 // <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-static dr_bool32 drflac__read_and_seek_residual(drflac_bs* bs, uint32_t blockSize, uint32_t order)
+static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
 {
-    assert(bs != NULL);
-    assert(blockSize != 0);
+    drflac_assert(bs != NULL);
+    drflac_assert(blockSize != 0);
 
-    uint8_t residualMethod;
+    drflac_uint8 residualMethod;
     if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DR_FALSE;    // Unknown or unsupported residual coding method.
+        return DRFLAC_FALSE;    // Unknown or unsupported residual coding method.
     }
 
-    uint8_t partitionOrder;
+    drflac_uint8 partitionOrder;
     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    uint32_t samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    uint32_t partitionsRemaining = (1 << partitionOrder);
+    drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    drflac_uint32 partitionsRemaining = (1 << partitionOrder);
     for (;;)
     {
-        uint8_t riceParam = 0;
+        drflac_uint8 riceParam = 0;
         if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
             if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             if (riceParam == 16) {
                 riceParam = 0xFF;
             }
         } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
             if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             if (riceParam == 32) {
                 riceParam = 0xFF;
@@ -1820,16 +2497,16 @@ static dr_bool32 drflac__read_and_seek_residual(drflac_bs* bs, uint32_t blockSiz
 
         if (riceParam != 0xFF) {
             if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         } else {
             unsigned char unencodedBitsPerSample = 0;
             if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         }
 
@@ -1842,44 +2519,44 @@ static dr_bool32 drflac__read_and_seek_residual(drflac_bs* bs, uint32_t blockSiz
         samplesInPartition = blockSize / (1 << partitionOrder);
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
-static dr_bool32 drflac__decode_samples__constant(drflac_bs* bs, uint32_t blockSize, uint32_t bitsPerSample, int32_t* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples)
 {
     // Only a single sample needs to be decoded here.
-    int32_t sample;
+    drflac_int32 sample;
     if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
     // we'll want to look at a more efficient way.
-    for (uint32_t i = 0; i < blockSize; ++i) {
+    for (drflac_uint32 i = 0; i < blockSize; ++i) {
         pDecodedSamples[i] = sample;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, uint32_t blockSize, uint32_t bitsPerSample, int32_t* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples)
 {
-    for (uint32_t i = 0; i < blockSize; ++i) {
-        int32_t sample;
+    for (drflac_uint32 i = 0; i < blockSize; ++i) {
+        drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
         pDecodedSamples[i] = sample;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__decode_samples__fixed(drflac_bs* bs, uint32_t blockSize, uint32_t bitsPerSample, uint8_t lpcOrder, int32_t* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
 {
-    short lpcCoefficientsTable[5][4] = {
+    drflac_int32 lpcCoefficientsTable[5][4] = {
         {0,  0, 0,  0},
         {1,  0, 0,  0},
         {2, -1, 0,  0},
@@ -1888,10 +2565,10 @@ static dr_bool32 drflac__decode_samples__fixed(drflac_bs* bs, uint32_t blockSize
     };
 
     // Warm up samples and coefficients.
-    for (uint32_t i = 0; i < lpcOrder; ++i) {
-        int32_t sample;
+    for (drflac_uint32 i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
         pDecodedSamples[i] = sample;
@@ -1899,195 +2576,224 @@ static dr_bool32 drflac__decode_samples__fixed(drflac_bs* bs, uint32_t blockSize
 
 
     if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__decode_samples__lpc(drflac_bs* bs, uint32_t blockSize, uint32_t bitsPerSample, uint8_t lpcOrder, int32_t* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
 {
+    drflac_uint8 i;
+
     // Warm up samples.
-    for (uint8_t i = 0; i < lpcOrder; ++i) {
-        int32_t sample;
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
         pDecodedSamples[i] = sample;
     }
 
-    uint8_t lpcPrecision;
+    drflac_uint8 lpcPrecision;
     if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
     if (lpcPrecision == 15) {
-        return DR_FALSE;    // Invalid.
+        return DRFLAC_FALSE;    // Invalid.
     }
     lpcPrecision += 1;
 
 
-    int8_t lpcShift;
+    drflac_int8 lpcShift;
     if (!drflac__read_int8(bs, 5, &lpcShift)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
 
-    int16_t coefficients[32];
-    for (uint8_t i = 0; i < lpcOrder; ++i) {
-        if (!drflac__read_int16(bs, lpcPrecision, coefficients + i)) {
-            return DR_FALSE;
+    drflac_int32 coefficients[32];
+    for (i = 0; i < lpcOrder; ++i) {
+        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
+            return DRFLAC_FALSE;
         }
     }
 
     if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
-static dr_bool32 drflac__read_next_frame_header(drflac_bs* bs, uint8_t streaminfoBitsPerSample, drflac_frame_header* header)
+static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
 {
-    assert(bs != NULL);
-    assert(header != NULL);
+    drflac_assert(bs != NULL);
+    drflac_assert(header != NULL);
 
-    // At the moment the sync code is as a form of basic validation. The CRC is stored, but is unused at the moment. This
-    // should probably be handled better in the future.
+    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   // -1 = reserved.
 
-    const uint32_t sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
-    const uint8_t bitsPerSampleTable[8] = {0, 8, 12, (uint8_t)-1, 16, 20, 24, (uint8_t)-1};   // -1 = reserved.
-
-    uint16_t syncCode = 0;
-    if (!drflac__read_uint16(bs, 14, &syncCode)) {
-        return DR_FALSE;
-    }
-
-    if (syncCode != 0x3FFE) {
-        // TODO: Try and recover by attempting to seek to and read the next frame?
-        return DR_FALSE;
-    }
-
-    uint8_t reserved;
-    if (!drflac__read_uint8(bs, 1, &reserved)) {
-        return DR_FALSE;
-    }
-
-    uint8_t blockingStrategy = 0;
-    if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
-        return DR_FALSE;
-    }
-
-
-
-    uint8_t blockSize = 0;
-    if (!drflac__read_uint8(bs, 4, &blockSize)) {
-        return DR_FALSE;
-    }
-
-    uint8_t sampleRate = 0;
-    if (!drflac__read_uint8(bs, 4, &sampleRate)) {
-        return DR_FALSE;
-    }
-
-    uint8_t channelAssignment = 0;
-    if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
-        return DR_FALSE;
-    }
-
-    uint8_t bitsPerSample = 0;
-    if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
-        return DR_FALSE;
-    }
-
-    if (!drflac__read_uint8(bs, 1, &reserved)) {
-        return DR_FALSE;
-    }
-
-
-    dr_bool32 isVariableBlockSize = blockingStrategy == 1;
-    if (isVariableBlockSize) {
-        uint64_t sampleNumber;
-        if (!drflac__read_utf8_coded_number(bs, &sampleNumber)) {
-            return DR_FALSE;
+    // Keep looping until we find a valid sync code.
+    for (;;) {
+        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
+            return DRFLAC_FALSE;
         }
-        header->frameNumber  = 0;
-        header->sampleNumber = sampleNumber;
-    } else {
-        uint64_t frameNumber = 0;
-        if (!drflac__read_utf8_coded_number(bs, &frameNumber)) {
-            return DR_FALSE;
+
+        drflac_uint8 crc8 = 0xCE; // 0xCE = drflac_crc8(0, 0x3FFE, 14);
+
+        drflac_uint8 reserved = 0;
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
         }
-        header->frameNumber  = (uint32_t)frameNumber;   // <-- Safe cast.
-        header->sampleNumber = 0;
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        drflac_uint8 blockingStrategy = 0;
+        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
+
+
+        drflac_uint8 blockSize = 0;
+        if (!drflac__read_uint8(bs, 4, &blockSize)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockSize, 4);
+
+
+        drflac_uint8 sampleRate = 0;
+        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, sampleRate, 4);
+
+
+        drflac_uint8 channelAssignment = 0;
+        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, channelAssignment, 4);
+
+
+        drflac_uint8 bitsPerSample = 0;
+        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
+
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        drflac_bool32 isVariableBlockSize = blockingStrategy == 1;
+        if (isVariableBlockSize) {
+            drflac_uint64 sampleNumber;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &sampleNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_END_OF_STREAM) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->frameNumber  = 0;
+            header->sampleNumber = sampleNumber;
+        } else {
+            drflac_uint64 frameNumber = 0;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &frameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_END_OF_STREAM) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->frameNumber  = (drflac_uint32)frameNumber;   // <-- Safe cast.
+            header->sampleNumber = 0;
+        }
+
+
+        if (blockSize == 1) {
+            header->blockSize = 192;
+        } else if (blockSize >= 2 && blockSize <= 5) {
+            header->blockSize = 576 * (1 << (blockSize - 2));
+        } else if (blockSize == 6) {
+            if (!drflac__read_uint16(bs, 8, &header->blockSize)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSize, 8);
+            header->blockSize += 1;
+        } else if (blockSize == 7) {
+            if (!drflac__read_uint16(bs, 16, &header->blockSize)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSize, 16);
+            header->blockSize += 1;
+        } else {
+            header->blockSize = 256 * (1 << (blockSize - 8));
+        }
+
+
+        if (sampleRate <= 11) {
+            header->sampleRate = sampleRateTable[sampleRate];
+        } else if (sampleRate == 12) {
+            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
+            header->sampleRate *= 1000;
+        } else if (sampleRate == 13) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+        } else if (sampleRate == 14) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+            header->sampleRate *= 10;
+        } else {
+            continue;  // Invalid. Assume an invalid block.
+        }
+
+
+        header->channelAssignment = channelAssignment;
+
+        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
+        if (header->bitsPerSample == 0) {
+            header->bitsPerSample = streaminfoBitsPerSample;
+        }
+
+        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
+            return DRFLAC_FALSE;
+        }
+
+    #ifndef DR_FLAC_NO_CRC
+        if (header->crc8 != crc8) {
+            continue;    // CRC mismatch. Loop back to the top and find the next sync code.
+        }
+    #endif
+        return DRFLAC_TRUE;
     }
-
-
-    if (blockSize == 1) {
-        header->blockSize = 192;
-    } else if (blockSize >= 2 && blockSize <= 5) {
-        header->blockSize = 576 * (1 << (blockSize - 2));
-    } else if (blockSize == 6) {
-        if (!drflac__read_uint16(bs, 8, &header->blockSize)) {
-            return DR_FALSE;
-        }
-        header->blockSize += 1;
-    } else if (blockSize == 7) {
-        if (!drflac__read_uint16(bs, 16, &header->blockSize)) {
-            return DR_FALSE;
-        }
-        header->blockSize += 1;
-    } else {
-        header->blockSize = 256 * (1 << (blockSize - 8));
-    }
-
-
-    if (sampleRate <= 11) {
-        header->sampleRate = sampleRateTable[sampleRate];
-    } else if (sampleRate == 12) {
-        if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
-            return DR_FALSE;
-        }
-        header->sampleRate *= 1000;
-    } else if (sampleRate == 13) {
-        if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-            return DR_FALSE;
-        }
-    } else if (sampleRate == 14) {
-        if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-            return DR_FALSE;
-        }
-        header->sampleRate *= 10;
-    } else {
-        return DR_FALSE;  // Invalid.
-    }
-
-
-    header->channelAssignment = channelAssignment;
-
-    header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
-    if (header->bitsPerSample == 0) {
-        header->bitsPerSample = streaminfoBitsPerSample;
-    }
-
-    if (drflac__read_uint8(bs, 8, &header->crc8) != 1) {
-        return DR_FALSE;
-    }
-
-    return DR_TRUE;
 }
 
-static dr_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
+static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
 {
-    uint8_t header;
+    drflac_uint8 header;
     if (!drflac__read_uint8(bs, 8, &header)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // First bit should always be 0.
     if ((header & 0x80) != 0) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     int type = (header & 0x7E) >> 1;
@@ -2112,7 +2818,7 @@ static dr_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pS
     }
 
     if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // Wasted bits per sample.
@@ -2120,22 +2826,22 @@ static dr_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pS
     if ((header & 0x01) == 1) {
         unsigned int wastedBitsPerSample;
         if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
         pSubframe->wastedBitsPerSample = (unsigned char)wastedBitsPerSample + 1;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, int32_t* pDecodedSamplesOut)
+static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
 {
-    assert(bs != NULL);
-    assert(frame != NULL);
+    drflac_assert(bs != NULL);
+    drflac_assert(frame != NULL);
 
     drflac_subframe* pSubframe = frame->subframes + subframeIndex;
     if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // Side channels require an extra bit per sample. Took a while to figure that one out...
@@ -2172,20 +2878,20 @@ static dr_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int
             drflac__decode_samples__lpc(bs, frame->header.blockSize, pSubframe->bitsPerSample, pSubframe->lpcOrder, pSubframe->pDecodedSamples);
         } break;
 
-        default: return DR_FALSE;
+        default: return DRFLAC_FALSE;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-static dr_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
+static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
 {
-    assert(bs != NULL);
-    assert(frame != NULL);
+    drflac_assert(bs != NULL);
+    drflac_assert(frame != NULL);
 
     drflac_subframe* pSubframe = frame->subframes + subframeIndex;
     if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // Side channels require an extra bit per sample. Took a while to figure that one out...
@@ -2199,14 +2905,13 @@ static dr_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int s
     // Need to handle wasted bits per sample.
     pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample;
     pSubframe->pDecodedSamples = NULL;
-    //pSubframe->pDecodedSamples = pFlac->pDecodedSamples + (pFlac->currentFrame.header.blockSize * subframeIndex);
 
     switch (pSubframe->subframeType)
     {
         case DRFLAC_SUBFRAME_CONSTANT:
         {
             if (!drflac__seek_bits(bs, pSubframe->bitsPerSample)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         } break;
 
@@ -2214,7 +2919,7 @@ static dr_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int s
         {
             unsigned int bitsToSeek = frame->header.blockSize * pSubframe->bitsPerSample;
             if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         } break;
 
@@ -2222,11 +2927,11 @@ static dr_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int s
         {
             unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample;
             if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             if (!drflac__read_and_seek_residual(bs, frame->header.blockSize, pSubframe->lpcOrder)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         } break;
 
@@ -2234,217 +2939,253 @@ static dr_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int s
         {
             unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample;
             if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             unsigned char lpcPrecision;
             if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             if (lpcPrecision == 15) {
-                return DR_FALSE;    // Invalid.
+                return DRFLAC_FALSE;    // Invalid.
             }
             lpcPrecision += 1;
 
 
             bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    // +5 for shift.
             if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             if (!drflac__read_and_seek_residual(bs, frame->header.blockSize, pSubframe->lpcOrder)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         } break;
 
-        default: return DR_FALSE;
+        default: return DRFLAC_FALSE;
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
-static DRFLAC_INLINE uint8_t drflac__get_channel_count_from_channel_assignment(int8_t channelAssignment)
+static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
 {
-    assert(channelAssignment <= 10);
+    drflac_assert(channelAssignment <= 10);
 
-    uint8_t lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
     return lookup[channelAssignment];
 }
 
-static dr_bool32 drflac__decode_frame(drflac* pFlac)
+static drflac_result drflac__decode_frame(drflac* pFlac)
 {
     // This function should be called while the stream is sitting on the first byte after the frame header.
-    memset(pFlac->currentFrame.subframes, 0, sizeof(pFlac->currentFrame.subframes));
+    drflac_zero_memory(pFlac->currentFrame.subframes, sizeof(pFlac->currentFrame.subframes));
 
     int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
-    for (int i = 0; i < channelCount; ++i)
-    {
+    for (int i = 0; i < channelCount; ++i) {
         if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + (pFlac->currentFrame.header.blockSize * i))) {
-            return DR_FALSE;
+            return DRFLAC_ERROR;
         }
     }
 
-    // At the end of the frame sits the padding and CRC. We don't use these so we can just seek past.
-    if (!drflac__seek_bits(&pFlac->bs, (DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7) + 16)) {
-        return DR_FALSE;
+    drflac_uint8 paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7;
+    if (paddingSizeInBits > 0) {
+        drflac_uint8 padding = 0;
+        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
+            return DRFLAC_END_OF_STREAM;
+        }
     }
 
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    drflac_uint16 desiredCRC16;
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_END_OF_STREAM;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    // CRC mismatch.
+    }
+#endif
 
     pFlac->currentFrame.samplesRemaining = pFlac->currentFrame.header.blockSize * channelCount;
 
-    return DR_TRUE;
+    return DRFLAC_SUCCESS;
 }
 
-static dr_bool32 drflac__seek_frame(drflac* pFlac)
+static drflac_result drflac__seek_frame(drflac* pFlac)
 {
     int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
-    for (int i = 0; i < channelCount; ++i)
-    {
+    for (int i = 0; i < channelCount; ++i) {
         if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFrame, i)) {
-            return DR_FALSE;
+            return DRFLAC_ERROR;
         }
     }
 
-    // Padding and CRC.
-    return drflac__seek_bits(&pFlac->bs, (DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7) + 16);
-}
-
-static dr_bool32 drflac__read_and_decode_next_frame(drflac* pFlac)
-{
-    assert(pFlac != NULL);
-
-    if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
-        return DR_FALSE;
+    // Padding.
+    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
+        return DRFLAC_ERROR;
     }
 
-    return drflac__decode_frame(pFlac);
+    // CRC.
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    drflac_uint16 desiredCRC16;
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_END_OF_STREAM;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    // CRC mismatch.
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_bool32 drflac__read_and_decode_next_frame(drflac* pFlac)
+{
+    drflac_assert(pFlac != NULL);
+
+    for (;;) {
+        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        drflac_result result = drflac__decode_frame(pFlac);
+        if (result != DRFLAC_SUCCESS) {
+            if (result == DRFLAC_CRC_MISMATCH) {
+                continue;   // CRC mismatch. Skip to the next frame.
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        return DRFLAC_TRUE;
+    }
 }
 
 
-static void drflac__get_current_frame_sample_range(drflac* pFlac, uint64_t* pFirstSampleInFrameOut, uint64_t* pLastSampleInFrameOut)
+static void drflac__get_current_frame_sample_range(drflac* pFlac, drflac_uint64* pFirstSampleInFrameOut, drflac_uint64* pLastSampleInFrameOut)
 {
-    assert(pFlac != NULL);
+    drflac_assert(pFlac != NULL);
 
     unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
 
-    uint64_t firstSampleInFrame = pFlac->currentFrame.header.sampleNumber;
+    drflac_uint64 firstSampleInFrame = pFlac->currentFrame.header.sampleNumber;
     if (firstSampleInFrame == 0) {
         firstSampleInFrame = pFlac->currentFrame.header.frameNumber * pFlac->maxBlockSize*channelCount;
     }
 
-    uint64_t lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channelCount);
+    drflac_uint64 lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channelCount);
     if (lastSampleInFrame > 0) {
         lastSampleInFrame -= 1; // Needs to be zero based.
     }
 
-
-    if (pFirstSampleInFrameOut) {
-        *pFirstSampleInFrameOut = firstSampleInFrame;
-    }
-    if (pLastSampleInFrameOut) {
-        *pLastSampleInFrameOut = lastSampleInFrame;
-    }
+    if (pFirstSampleInFrameOut) *pFirstSampleInFrameOut = firstSampleInFrame;
+    if (pLastSampleInFrameOut) *pLastSampleInFrameOut = lastSampleInFrame;
 }
 
-static dr_bool32 drflac__seek_to_first_frame(drflac* pFlac)
+static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
 {
-    assert(pFlac != NULL);
+    drflac_assert(pFlac != NULL);
 
-    dr_bool32 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos);
+    drflac_bool32 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos);
 
-    memset(&pFlac->currentFrame, 0, sizeof(pFlac->currentFrame));
+    drflac_zero_memory(&pFlac->currentFrame, sizeof(pFlac->currentFrame));
     return result;
 }
 
-static DRFLAC_INLINE dr_bool32 drflac__seek_to_next_frame(drflac* pFlac)
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_frame(drflac* pFlac)
 {
     // This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section.
-    assert(pFlac != NULL);
+    drflac_assert(pFlac != NULL);
     return drflac__seek_frame(pFlac);
 }
 
-static dr_bool32 drflac__seek_to_frame_containing_sample(drflac* pFlac, uint64_t sampleIndex)
+static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_uint64 sampleIndex)
 {
-    assert(pFlac != NULL);
-
+    // We need to find the frame that contains the sample. To do this, we iterate over each frame and inspect it's header. If based on the
+    // header we can determine that the frame contains the sample, we do a full decode of that frame.
     if (!drflac__seek_to_first_frame(pFlac)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    uint64_t firstSampleInFrame = 0;
-    uint64_t lastSampleInFrame = 0;
-    for (;;)
-    {
-        // We need to read the frame's header in order to determine the range of samples it contains.
+    drflac_uint64 runningSampleCount = 0;
+    for (;;) {
         if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
+        drflac_uint64 firstSampleInFrame = 0;
+        drflac_uint64 lastSampleInFrame = 0;
         drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
-        if (sampleIndex >= firstSampleInFrame && sampleIndex <= lastSampleInFrame) {
-            break;  // The sample is in this frame.
-        }
 
-        if (!drflac__seek_to_next_frame(pFlac)) {
-            return DR_FALSE;
+        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
+            // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            // it never existed and keep iterating.
+            drflac_result result = drflac__decode_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
+                drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
+                if (samplesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+                return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0;  // <-- If this fails, something bad has happened (it should never fail).
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            // frame never existed and leave the running sample count untouched.
+            drflac_result result = drflac__seek_to_next_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningSampleCount += sampleCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
         }
     }
-
-    // If we get here we should be right at the start of the frame containing the sample.
-    return DR_TRUE;
-}
-
-static dr_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, uint64_t sampleIndex)
-{
-    if (!drflac__seek_to_frame_containing_sample(pFlac, sampleIndex)) {
-        return DR_FALSE;
-    }
-
-    // At this point we should be sitting on the first byte of the frame containing the sample. We need to decode every sample up to (but
-    // not including) the sample we're seeking to.
-    uint64_t firstSampleInFrame = 0;
-    drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, NULL);
-
-    assert(firstSampleInFrame <= sampleIndex);
-    size_t samplesToDecode = (size_t)(sampleIndex - firstSampleInFrame);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
-    if (samplesToDecode == 0) {
-        return DR_TRUE;
-    }
-
-    // At this point we are just sitting on the byte after the frame header. We need to decode the frame before reading anything from it.
-    if (!drflac__decode_frame(pFlac)) {
-        return DR_FALSE;
-    }
-
-    return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0;
 }
 
 
-static dr_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, uint64_t sampleIndex)
+static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_uint64 sampleIndex)
 {
-    assert(pFlac != NULL);
+    drflac_assert(pFlac != NULL);
 
     if (pFlac->seektablePos == 0) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     if (!drflac__seek_to_byte(&pFlac->bs, pFlac->seektablePos)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // The number of seek points is derived from the size of the SEEKTABLE block.
-    uint32_t seekpointCount = pFlac->seektableSize / 18;   // 18 = the size of each seek point.
+    drflac_uint32 seekpointCount = pFlac->seektableSize / 18;   // 18 = the size of each seek point.
     if (seekpointCount == 0) {
-        return DR_FALSE;   // Would this ever happen?
+        return DRFLAC_FALSE;   // Would this ever happen?
     }
 
 
     drflac_seekpoint closestSeekpoint = {0, 0, 0};
 
-    uint32_t seekpointsRemaining = seekpointCount;
-    while (seekpointsRemaining > 0)
-    {
+    drflac_uint32 seekpointsRemaining = seekpointCount;
+    while (seekpointsRemaining > 0) {
         drflac_seekpoint seekpoint;
         if (!drflac__read_uint64(&pFlac->bs, 64, &seekpoint.firstSample)) {
             break;
@@ -2456,7 +3197,9 @@ static dr_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, uint64_t samp
             break;
         }
 
-        if (seekpoint.firstSample * pFlac->channels > sampleIndex) {
+        // Note that the seekpoint sample is based on a single channel. The input sample (sampleIndex) is based on interleaving, thus
+        // we need to multiple the seekpoint's sample by the channel count.
+        if (seekpoint.firstSample*pFlac->channels > sampleIndex) {
             break;
         }
 
@@ -2467,53 +3210,68 @@ static dr_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, uint64_t samp
     // At this point we should have found the seekpoint closest to our sample. We need to seek to it using basically the same
     // technique as we use with the brute force method.
     if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos + closestSeekpoint.frameOffset)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-
-    uint64_t firstSampleInFrame = 0;
-    uint64_t lastSampleInFrame = 0;
-    for (;;)
-    {
-        // We need to read the frame's header in order to determine the range of samples it contains.
+    drflac_uint64 runningSampleCount = closestSeekpoint.firstSample*pFlac->channels;
+    for (;;) {
         if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
+        drflac_uint64 firstSampleInFrame = 0;
+        drflac_uint64 lastSampleInFrame = 0;
         drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
-        if (sampleIndex >= firstSampleInFrame && sampleIndex <= lastSampleInFrame) {
-            break;  // The sample is in this frame.
-        }
 
-        if (!drflac__seek_to_next_frame(pFlac)) {
-            return DR_FALSE;
+        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
+            // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            // it never existed and keep iterating.
+            drflac_result result = drflac__decode_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
+                drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
+                if (samplesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+                return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0;  // <-- If this fails, something bad has happened (it should never fail).
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            // frame never existed and leave the running sample count untouched.
+            drflac_result result = drflac__seek_to_next_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningSampleCount += sampleCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
         }
     }
-
-    assert(firstSampleInFrame <= sampleIndex);
-
-    // At this point we are just sitting on the byte after the frame header. We need to decode the frame before reading anything from it.
-    if (!drflac__decode_frame(pFlac)) {
-        return DR_FALSE;
-    }
-
-    size_t samplesToDecode = (size_t)(sampleIndex - firstSampleInFrame);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
-    return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;
 }
 
 
 #ifndef DR_FLAC_NO_OGG
 typedef struct
 {
-    uint8_t capturePattern[4];  // Should be "OggS"
-    uint8_t structureVersion;   // Always 0.
-    uint8_t headerType;
-    uint64_t granulePosition;
-    uint32_t serialNumber;
-    uint32_t sequenceNumber;
-    uint32_t checksum;
-    uint8_t segmentCount;
-    uint8_t segmentTable[255];
+    drflac_uint8 capturePattern[4];  // Should be "OggS"
+    drflac_uint8 structureVersion;   // Always 0.
+    drflac_uint8 headerType;
+    drflac_uint64 granulePosition;
+    drflac_uint32 serialNumber;
+    drflac_uint32 sequenceNumber;
+    drflac_uint32 checksum;
+    drflac_uint8 segmentCount;
+    drflac_uint8 segmentTable[255];
 } drflac_ogg_page_header;
 #endif
 
@@ -2522,25 +3280,28 @@ typedef struct
     drflac_read_proc onRead;
     drflac_seek_proc onSeek;
     drflac_meta_proc onMeta;
+    drflac_container container;
     void* pUserData;
     void* pUserDataMD;
-    drflac_container container;
-    uint32_t sampleRate;
-    uint8_t  channels;
-    uint8_t  bitsPerSample;
-    uint64_t totalSampleCount;
-    uint16_t maxBlockSize;
-    uint64_t runningFilePos;
-    dr_bool32 hasMetadataBlocks;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalSampleCount;
+    drflac_uint16 maxBlockSize;
+    drflac_uint64 runningFilePos;
+    drflac_bool32 hasStreamInfoBlock;
+    drflac_bool32 hasMetadataBlocks;
+    drflac_bs bs;                           // <-- A bit streamer is required for loading data during initialization.
+    drflac_frame_header firstFrameHeader;   // <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block.
 
 #ifndef DR_FLAC_NO_OGG
-    uint32_t oggSerial;
-    uint64_t oggFirstBytePos;
+    drflac_uint32 oggSerial;
+    drflac_uint64 oggFirstBytePos;
     drflac_ogg_page_header oggBosHeader;
 #endif
 } drflac_init_info;
 
-static DRFLAC_INLINE void drflac__decode_block_header(uint32_t blockHeader, uint8_t* isLastBlock, uint8_t* blockType, uint32_t* blockSize)
+static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
 {
     blockHeader = drflac__be2host_32(blockHeader);
     *isLastBlock = (blockHeader & (0x01 << 31)) >> 31;
@@ -2548,41 +3309,41 @@ static DRFLAC_INLINE void drflac__decode_block_header(uint32_t blockHeader, uint
     *blockSize   = (blockHeader & 0xFFFFFF);
 }
 
-static DRFLAC_INLINE dr_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, uint8_t* isLastBlock, uint8_t* blockType, uint32_t* blockSize)
+static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
 {
-    uint32_t blockHeader;
+    drflac_uint32 blockHeader;
     if (onRead(pUserData, &blockHeader, 4) != 4) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-dr_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
+drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
 {
     // min/max block size.
-    uint32_t blockSizes;
+    drflac_uint32 blockSizes;
     if (onRead(pUserData, &blockSizes, 4) != 4) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // min/max frame size.
-    uint64_t frameSizes = 0;
+    drflac_uint64 frameSizes = 0;
     if (onRead(pUserData, &frameSizes, 6) != 6) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // Sample rate, channels, bits per sample and total sample count.
-    uint64_t importantProps;
+    drflac_uint64 importantProps;
     if (onRead(pUserData, &importantProps, 8) != 8) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     // MD5
-    uint8_t md5[16];
+    drflac_uint8 md5[16];
     if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     blockSizes     = drflac__be2host_32(blockSizes);
@@ -2591,34 +3352,33 @@ dr_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drfl
 
     pStreamInfo->minBlockSize     = (blockSizes & 0xFFFF0000) >> 16;
     pStreamInfo->maxBlockSize     = blockSizes & 0x0000FFFF;
-    pStreamInfo->minFrameSize     = (uint32_t)((frameSizes & 0xFFFFFF0000000000ULL) >> 40ULL);
-    pStreamInfo->maxFrameSize     = (uint32_t)((frameSizes & 0x000000FFFFFF0000ULL) >> 16ULL);
-    pStreamInfo->sampleRate       = (uint32_t)((importantProps & 0xFFFFF00000000000ULL) >> 44ULL);
-    pStreamInfo->channels         = (uint8_t )((importantProps & 0x00000E0000000000ULL) >> 41ULL) + 1;
-    pStreamInfo->bitsPerSample    = (uint8_t )((importantProps & 0x000001F000000000ULL) >> 36ULL) + 1;
-    pStreamInfo->totalSampleCount = (importantProps & 0x0000000FFFFFFFFFULL) * pStreamInfo->channels;
-    memcpy(pStreamInfo->md5, md5, sizeof(md5));
+    pStreamInfo->minFrameSize     = (drflac_uint32)((frameSizes     & (drflac_uint64)0xFFFFFF0000000000) >> 40);
+    pStreamInfo->maxFrameSize     = (drflac_uint32)((frameSizes     & (drflac_uint64)0x000000FFFFFF0000) >> 16);
+    pStreamInfo->sampleRate       = (drflac_uint32)((importantProps & (drflac_uint64)0xFFFFF00000000000) >> 44);
+    pStreamInfo->channels         = (drflac_uint8 )((importantProps & (drflac_uint64)0x00000E0000000000) >> 41) + 1;
+    pStreamInfo->bitsPerSample    = (drflac_uint8 )((importantProps & (drflac_uint64)0x000001F000000000) >> 36) + 1;
+    pStreamInfo->totalSampleCount = (importantProps & (drflac_uint64)0x0000000FFFFFFFFF) * pStreamInfo->channels;
+    drflac_copy_memory(pStreamInfo->md5, md5, sizeof(md5));
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
+drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
 {
-    assert(pFlac != NULL);
+    drflac_assert(pFlac != NULL);
 
     // We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
     // we'll be sitting on byte 42.
-    uint64_t runningFilePos = 42;
-    uint64_t seektablePos  = 0;
-    uint32_t seektableSize = 0;
+    drflac_uint64 runningFilePos = 42;
+    drflac_uint64 seektablePos   = 0;
+    drflac_uint32 seektableSize  = 0;
 
-    for (;;)
-    {
-        uint8_t isLastBlock = 0;
-        uint8_t blockType;
-        uint32_t blockSize;
+    for (;;) {
+        drflac_uint8 isLastBlock = 0;
+        drflac_uint8 blockType;
+        drflac_uint32 blockSize;
         if (!drflac__read_and_decode_block_header(pFlac->bs.onRead, pFlac->bs.pUserData, &isLastBlock, &blockType, &blockSize)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
         runningFilePos += 4;
 
@@ -2633,24 +3393,24 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
             case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
             {
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
-                    metadata.data.application.id       = drflac__be2host_32(*(uint32_t*)pRawData);
-                    metadata.data.application.pData    = (const void*)((uint8_t*)pRawData + sizeof(uint32_t));
-                    metadata.data.application.dataSize = blockSize - sizeof(uint32_t);
+                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
+                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
+                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
 
@@ -2660,14 +3420,14 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
                 seektableSize = blockSize;
 
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
@@ -2676,7 +3436,7 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
                     metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
 
                     // Endian swap.
-                    for (uint32_t iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
+                    for (drflac_uint32 iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
                         drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
                         pSeekpoint->firstSample = drflac__be2host_64(pSeekpoint->firstSample);
                         pSeekpoint->frameOffset = drflac__be2host_64(pSeekpoint->frameOffset);
@@ -2685,96 +3445,96 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
 
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
 
             case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
             {
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
                     const char* pRunningData = (const char*)pRawData;
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.vorbis_comment.vendor       = pRunningData;                                 pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                      pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
                     metadata.data.vorbis_comment.comments     = pRunningData;
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
 
             case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
             {
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
                     const char* pRunningData = (const char*)pRawData;
-                    memcpy(metadata.data.cuesheet.catalog, pRunningData, 128);                               pRunningData += 128;
-                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(uint64_t*)pRunningData); pRunningData += 4;
-                    metadata.data.cuesheet.isCD              = ((pRunningData[0] & 0x80) >> 7) != 0;         pRunningData += 259;
-                    metadata.data.cuesheet.trackCount        = pRunningData[0];                              pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = (const uint8_t*)pRunningData;
+                    drflac_copy_memory(metadata.data.cuesheet.catalog, pRunningData, 128);                        pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(drflac_uint64*)pRunningData); pRunningData += 4;
+                    metadata.data.cuesheet.isCD              = ((pRunningData[0] & 0x80) >> 7) != 0;              pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                   pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = (const drflac_uint8*)pRunningData;
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
 
             case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
             {
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
                     const char* pRunningData = (const char*)pRawData;
-                    metadata.data.picture.type              = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength        = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mime              = pRunningData;                                 pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.type              = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength        = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mime              = pRunningData;                                      pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
                     metadata.data.picture.description       = pRunningData;
-                    metadata.data.picture.width             = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height            = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth        = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount   = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize   = drflac__be2host_32(*(uint32_t*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pPictureData      = (const uint8_t*)pRunningData;
+                    metadata.data.picture.width             = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.height            = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth        = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount   = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize   = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData      = (const drflac_uint8*)pRunningData;
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
 
@@ -2783,12 +3543,12 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
                 if (pFlac->onMeta) {
                     metadata.data.padding.unused = 0;
 
-                    // Padding doesn't have anything meaningful in it, so just skip over it.
+                    // Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback.
                     if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) {
-                        return DR_FALSE;
+                        isLastBlock = DRFLAC_TRUE;  // An error occured while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop.
+                    } else {
+                        pFlac->onMeta(pFlac->pUserDataMD, &metadata);
                     }
-
-                    pFlac->onMeta(pFlac->pUserDataMD, &metadata);
                 }
             } break;
 
@@ -2797,7 +3557,7 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
                 // Invalid chunk. Just skip over this one.
                 if (pFlac->onMeta) {
                     if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) {
-                        return DR_FALSE;
+                        isLastBlock = DRFLAC_TRUE;  // An error occured while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop.
                     }
                 }
             }
@@ -2807,29 +3567,29 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
                 // It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
                 // can at the very least report the chunk to the application and let it look at the raw data.
                 if (pFlac->onMeta) {
-                    void* pRawData = malloc(blockSize);
+                    void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) {
-                        free(pRawData);
-                        return DR_FALSE;
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
                     }
 
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
                     pFlac->onMeta(pFlac->pUserDataMD, &metadata);
 
-                    free(pRawData);
+                    DRFLAC_FREE(pRawData);
                 }
             } break;
         }
 
         // If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above.
-        if (pFlac->onMeta == NULL) {
+        if (pFlac->onMeta == NULL && blockSize > 0) {
             if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) {
-                return DR_FALSE;
+                isLastBlock = DRFLAC_TRUE;
             }
         }
 
@@ -2843,10 +3603,10 @@ dr_bool32 drflac__read_and_decode_metadata(drflac* pFlac)
     pFlac->seektableSize = seektableSize;
     pFlac->firstFramePos = runningFilePos;
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-dr_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD)
+drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
 {
     (void)onSeek;
 
@@ -2855,56 +3615,193 @@ dr_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc
     pInit->container = drflac_container_native;
 
     // The first metadata block should be the STREAMINFO block.
-    uint8_t isLastBlock;
-    uint8_t blockType;
-    uint32_t blockSize;
+    drflac_uint8 isLastBlock;
+    drflac_uint8 blockType;
+    drflac_uint32 blockSize;
     if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
     if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-        return DR_FALSE;    // Invalid block type. First block must be the STREAMINFO block.
+        if (!relaxed) {
+            // We're opening in strict mode and the first block is not the STREAMINFO block. Error.
+            return DRFLAC_FALSE;
+        } else {
+            // Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            // for that frame.
+            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
+            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
+
+            if (!drflac__read_next_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    // Couldn't find a frame.
+            }
+
+            if (pInit->firstFrameHeader.bitsPerSample == 0) {
+                return DRFLAC_FALSE;    // Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist.
+            }
+
+            pInit->sampleRate    = pInit->firstFrameHeader.sampleRate;
+            pInit->channels      = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
+            pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample;
+            pInit->maxBlockSize  = 65535;   // <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo
+            return DRFLAC_TRUE;
+        }
+    } else {
+        drflac_streaminfo streaminfo;
+        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+            return DRFLAC_FALSE;
+        }
+
+        pInit->hasStreamInfoBlock = DRFLAC_TRUE;
+        pInit->sampleRate         = streaminfo.sampleRate;
+        pInit->channels           = streaminfo.channels;
+        pInit->bitsPerSample      = streaminfo.bitsPerSample;
+        pInit->totalSampleCount   = streaminfo.totalSampleCount;
+        pInit->maxBlockSize       = streaminfo.maxBlockSize;    // Don't care about the min block size - only the max (used for determining the size of the memory allocation).
+        pInit->hasMetadataBlocks = !isLastBlock;
+
+        if (onMeta) {
+            drflac_metadata metadata;
+            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+            metadata.pRawData = NULL;
+            metadata.rawDataSize = 0;
+            metadata.data.streaminfo = streaminfo;
+            onMeta(pUserDataMD, &metadata);
+        }
+
+        return DRFLAC_TRUE;
     }
-
-
-    drflac_streaminfo streaminfo;
-    if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-        return DR_FALSE;
-    }
-
-    pInit->sampleRate       = streaminfo.sampleRate;
-    pInit->channels         = streaminfo.channels;
-    pInit->bitsPerSample    = streaminfo.bitsPerSample;
-    pInit->totalSampleCount = streaminfo.totalSampleCount;
-    pInit->maxBlockSize     = streaminfo.maxBlockSize;    // Don't care about the min block size - only the max (used for determining the size of the memory allocation).
-
-    if (onMeta) {
-        drflac_metadata metadata;
-        metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
-        metadata.pRawData = NULL;
-        metadata.rawDataSize = 0;
-        metadata.data.streaminfo = streaminfo;
-        onMeta(pUserDataMD, &metadata);
-    }
-
-    pInit->hasMetadataBlocks = !isLastBlock;
-    return DR_TRUE;
 }
 
 #ifndef DR_FLAC_NO_OGG
-static DRFLAC_INLINE dr_bool32 drflac_ogg__is_capture_pattern(uint8_t pattern[4])
+#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  // CRC-32 of "OggS".
+
+typedef enum
+{
+    drflac_ogg_recover_on_crc_mismatch,
+    drflac_ogg_fail_on_crc_mismatch
+} drflac_ogg_crc_mismatch_recovery;
+
+
+static drflac_uint32 drflac__crc32_table[] = {
+    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
+    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
+    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
+    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
+    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
+    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
+    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
+    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
+    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
+    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
+    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
+    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
+    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
+    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
+    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
+    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
+    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
+    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
+    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
+    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
+    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
+    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
+    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
+    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
+    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
+    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
+    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
+    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
+    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
+    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
+    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
+    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
+    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
+    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
+    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
+    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
+    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
+    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
+    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
+    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
+    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
+    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
+    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
+    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
+    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
+    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
+    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
+    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
+    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
+    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
+    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
+    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
+    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
+    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
+    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
+    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
+    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
+    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
+    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
+    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
+    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
+    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
+    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
+    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
+};
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
+{
+#ifndef DR_FLAC_NO_CRC
+    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
+#else
+    (void)data;
+    return crc32;
+#endif
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
+{
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
+    return crc32;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
+{
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
+    return crc32;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
+{
+    // This can be optimized.
+    for (drflac_uint32 i = 0; i < dataSize; ++i) {
+        crc32 = drflac_crc32_byte(crc32, pData[i]);
+    }
+    return crc32;
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
 {
     return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
 }
 
-static DRFLAC_INLINE uint32_t drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
 {
     return 27 + pHeader->segmentCount;
 }
 
-static DRFLAC_INLINE uint32_t drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
 {
-    uint32_t pageBodySize = 0;
+    drflac_uint32 pageBodySize = 0;
     for (int i = 0; i < pHeader->segmentCount; ++i) {
         pageBodySize += pHeader->segmentTable[i];
     }
@@ -2912,49 +3809,84 @@ static DRFLAC_INLINE uint32_t drflac_ogg__get_page_body_size(drflac_ogg_page_hea
     return pageBodySize;
 }
 
-dr_bool32 drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, uint32_t* pHeaderSize)
+drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
 {
-    if (onRead(pUserData, &pHeader->structureVersion, 1) != 1 || pHeader->structureVersion != 0) {
-        return DR_FALSE;   // Unknown structure version. Possibly corrupt stream.
+    drflac_assert(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
+
+    drflac_uint8 data[23];
+    if (onRead(pUserData, data, 23) != 23) {
+        return DRFLAC_END_OF_STREAM;
     }
-    if (onRead(pUserData, &pHeader->headerType, 1) != 1) {
-        return DR_FALSE;
-    }
-    if (onRead(pUserData, &pHeader->granulePosition, 8) != 8) {
-        return DR_FALSE;
-    }
-    if (onRead(pUserData, &pHeader->serialNumber, 4) != 4) {
-        return DR_FALSE;
-    }
-    if (onRead(pUserData, &pHeader->sequenceNumber, 4) != 4) {
-        return DR_FALSE;
-    }
-    if (onRead(pUserData, &pHeader->checksum, 4) != 4) {
-        return DR_FALSE;
-    }
-    if (onRead(pUserData, &pHeader->segmentCount, 1) != 1 || pHeader->segmentCount == 0) {
-        return DR_FALSE;   // Should not have a segment count of 0.
-    }
-    if (onRead(pUserData, &pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
-        return DR_FALSE;
+    *pBytesRead += 23;
+
+    pHeader->structureVersion = data[0];
+    pHeader->headerType       = data[1];
+    drflac_copy_memory(&pHeader->granulePosition, &data[ 2], 8);
+    drflac_copy_memory(&pHeader->serialNumber,    &data[10], 4);
+    drflac_copy_memory(&pHeader->sequenceNumber,  &data[14], 4);
+    drflac_copy_memory(&pHeader->checksum,        &data[18], 4);
+    pHeader->segmentCount     = data[22];
+
+    // Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0.
+    data[18] = 0;
+    data[19] = 0;
+    data[20] = 0;
+    data[21] = 0;
+
+    drflac_uint32 i;
+    for (i = 0; i < 23; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
     }
 
-    if (pHeaderSize) *pHeaderSize = (27 + pHeader->segmentCount);
-    return DR_TRUE;
+
+    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
+        return DRFLAC_END_OF_STREAM;
+    }
+    *pBytesRead += pHeader->segmentCount;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
+    }
+
+    return DRFLAC_SUCCESS;
 }
 
-dr_bool32 drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, uint32_t* pHeaderSize)
+drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
 {
-    uint8_t id[4];
+    *pBytesRead = 0;
+
+    drflac_uint8 id[4];
     if (onRead(pUserData, id, 4) != 4) {
-        return DR_FALSE;
+        return DRFLAC_END_OF_STREAM;
     }
+    *pBytesRead += 4;
 
-    if (id[0] != 'O' || id[1] != 'g' || id[2] != 'g' || id[3] != 'S') {
-        return DR_FALSE;
+    // We need to read byte-by-byte until we find the OggS capture pattern.
+    for (;;) {
+        if (drflac_ogg__is_capture_pattern(id)) {
+            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+
+            drflac_result result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            if (result == DRFLAC_SUCCESS) {
+                return DRFLAC_SUCCESS;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;
+                } else {
+                    return result;
+                }
+            }
+        } else {
+            // The first 4 bytes did not equal the capture pattern. Read the next byte and try again.
+            id[0] = id[1];
+            id[1] = id[2];
+            id[2] = id[3];
+            if (onRead(pUserData, &id[3], 1) != 1) {
+                return DRFLAC_END_OF_STREAM;
+            }
+            *pBytesRead += 1;
+        }
     }
-
-    return drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pHeaderSize);
 }
 
 
@@ -2968,12 +3900,14 @@ typedef struct
     drflac_read_proc onRead;    // The original onRead callback from drflac_open() and family.
     drflac_seek_proc onSeek;    // The original onSeek callback from drflac_open() and family.
     void* pUserData;            // The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family.
-    uint64_t currentBytePos;    // The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking.
-    uint64_t firstBytePos;      // The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page.
-    uint32_t serialNumber;      // The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization.
+    drflac_uint64 currentBytePos;   // The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking.
+    drflac_uint64 firstBytePos;     // The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page.
+    drflac_uint32 serialNumber;     // The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization.
     drflac_ogg_page_header bosPageHeader;   // Used for seeking.
     drflac_ogg_page_header currentPageHeader;
-    uint32_t bytesRemainingInPage;
+    drflac_uint32 bytesRemainingInPage;
+    drflac_uint32 pageDataSize;
+    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
 } drflac_oggbs; // oggbs = Ogg Bitstream
 
 static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
@@ -2984,82 +3918,103 @@ static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut,
     return bytesActuallyRead;
 }
 
-static dr_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, uint64_t offset, drflac_seek_origin origin)
+static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
 {
-    if (origin == drflac_seek_origin_start)
-    {
+    if (origin == drflac_seek_origin_start) {
         if (offset <= 0x7FFFFFFF) {
             if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             oggbs->currentBytePos = offset;
 
-            return DR_TRUE;
+            return DRFLAC_TRUE;
         } else {
             if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             oggbs->currentBytePos = offset;
 
             return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
         }
-    }
-    else
-    {
+    } else {
         while (offset > 0x7FFFFFFF) {
             if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
             oggbs->currentBytePos += 0x7FFFFFFF;
             offset -= 0x7FFFFFFF;
         }
 
         if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    // <-- Safe cast thanks to the loop above.
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
         oggbs->currentBytePos += offset;
 
-        return DR_TRUE;
+        return DRFLAC_TRUE;
     }
 }
 
-static dr_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs)
+static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
 {
     drflac_ogg_page_header header;
-    for (;;)
-    {
-        uint32_t headerSize;
-        if (!drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &headerSize)) {
-            return DR_FALSE;
+    for (;;) {
+        drflac_uint32 crc32 = 0;
+        drflac_uint32 bytesRead;
+        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
         }
-        oggbs->currentBytePos += headerSize;
+        oggbs->currentBytePos += bytesRead;
 
-
-        uint32_t pageBodySize = drflac_ogg__get_page_body_size(&header);
-
-        if (header.serialNumber == oggbs->serialNumber) {
-            oggbs->currentPageHeader = header;
-            oggbs->bytesRemainingInPage = pageBodySize;
-            return DR_TRUE;
+        drflac_uint32 pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
+            continue;   // Invalid page size. Assume it's corrupted and just move to the next page.
         }
 
-        // If we get here it means the page is not a FLAC page - skip it.
-        if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {    // <-- Safe cast - maximum size of a page is way below that of an int.
-            return DR_FALSE;
+        if (header.serialNumber != oggbs->serialNumber) {
+            // It's not a FLAC page. Skip it.
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            continue;
         }
+
+
+        // We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page.
+        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->pageDataSize = pageBodySize;
+
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        if (actualCRC32 != header.checksum) {
+            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
+                continue;   // CRC mismatch. Skip this page.
+            } else {
+                // Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                // go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                // seek did not fully complete.
+                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
+                return DRFLAC_FALSE;
+            }
+        }
+#endif
+
+        oggbs->currentPageHeader = header;
+        oggbs->bytesRemainingInPage = pageBodySize;
+        return DRFLAC_TRUE;
     }
 }
 
 // Function below is unused at the moment, but I might be re-adding it later.
 #if 0
-static uint8_t drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, uint8_t* pBytesRemainingInSeg)
+static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
 {
-    uint32_t bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
-    uint8_t iSeg = 0;
-    uint32_t iByte = 0;
-    while (iByte < bytesConsumedInPage)
-    {
-        uint8_t segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
+    drflac_uint8 iSeg = 0;
+    drflac_uint32 iByte = 0;
+    while (iByte < bytesConsumedInPage) {
+        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
         if (iByte + segmentSize > bytesConsumedInPage) {
             break;
         } else {
@@ -3068,26 +4023,25 @@ static uint8_t drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, uint
         }
     }
 
-    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (uint8_t)(bytesConsumedInPage - iByte);
+    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
     return iSeg;
 }
 
-static dr_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
+static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
 {
     // The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page.
-    for (;;)    // <-- Loop over pages.
-    {
-        dr_bool32 atEndOfPage = DR_FALSE;
+    for (;;) {
+        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
 
-        uint8_t bytesRemainingInSeg;
-        uint8_t iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
+        drflac_uint8 bytesRemainingInSeg;
+        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
 
-        uint32_t bytesToEndOfPacketOrPage = bytesRemainingInSeg;
-        for (uint8_t iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
-            uint8_t segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
+        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
+            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
             if (segmentSize < 255) {
                 if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
-                    atEndOfPage = DR_TRUE;
+                    atEndOfPage = DRFLAC_TRUE;
                 }
 
                 break;
@@ -3097,32 +4051,29 @@ static dr_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
         }
 
         // At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
-        // want to load the next page and keep searching for the end of the frame.
+        // want to load the next page and keep searching for the end of the packet.
         drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
         oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
 
-        if (atEndOfPage)
-        {
+        if (atEndOfPage) {
             // We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
             // straddle pages.
             if (!drflac_oggbs__goto_next_page(oggbs)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             // If it's a fresh packet it most likely means we're at the next packet.
             if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
-                return DR_TRUE;
+                return DRFLAC_TRUE;
             }
-        }
-        else
-        {
-            // We're at the next frame.
-            return DR_TRUE;
+        } else {
+            // We're at the next packet.
+            return DRFLAC_TRUE;
         }
     }
 }
 
-static dr_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
+static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
 {
     // The bitstream should be sitting on the first byte just after the header of the frame.
 
@@ -3134,76 +4085,67 @@ static dr_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
 static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
     drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    assert(oggbs != NULL);
+    drflac_assert(oggbs != NULL);
 
-    uint8_t* pRunningBufferOut = (uint8_t*)bufferOut;
+    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
 
     // Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one.
     size_t bytesRead = 0;
-    while (bytesRead < bytesToRead)
-    {
+    while (bytesRead < bytesToRead) {
         size_t bytesRemainingToRead = bytesToRead - bytesRead;
 
         if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
-            bytesRead += oggbs->onRead(oggbs->pUserData, pRunningBufferOut, bytesRemainingToRead);
-            oggbs->bytesRemainingInPage -= (uint32_t)bytesRemainingToRead;
+            drflac_copy_memory(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
+            bytesRead += bytesRemainingToRead;
+            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
             break;
         }
 
         // If we get here it means some of the requested data is contained in the next pages.
         if (oggbs->bytesRemainingInPage > 0) {
-            size_t bytesJustRead = oggbs->onRead(oggbs->pUserData, pRunningBufferOut, oggbs->bytesRemainingInPage);
-            bytesRead += bytesJustRead;
-            pRunningBufferOut += bytesJustRead;
-
-            if (bytesJustRead != oggbs->bytesRemainingInPage) {
-                break;  // Ran out of data.
-            }
+            drflac_copy_memory(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
+            bytesRead += oggbs->bytesRemainingInPage;
+            pRunningBufferOut += oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
         }
 
-        assert(bytesRemainingToRead > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs)) {
-            break;  // Failed to go to the next chunk. Might have simply hit the end of the stream.
+        drflac_assert(bytesRemainingToRead > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            break;  // Failed to go to the next page. Might have simply hit the end of the stream.
         }
     }
 
-    oggbs->currentBytePos += bytesRead;
     return bytesRead;
 }
 
-static dr_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
+static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
 {
     drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    assert(oggbs != NULL);
-    assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    drflac_assert(oggbs != NULL);
+    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
 
     // Seeking is always forward which makes things a lot simpler.
     if (origin == drflac_seek_origin_start) {
-        int startBytePos = (int)oggbs->firstBytePos + (79-42);  // 79 = size of bos page; 42 = size of FLAC header data. Seek up to the first byte of the native FLAC data.
-        if (!drflac_oggbs__seek_physical(oggbs, startBytePos, drflac_seek_origin_start)) {
-            return DR_FALSE;
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
         }
 
-        oggbs->currentPageHeader = oggbs->bosPageHeader;
-        oggbs->bytesRemainingInPage = 42;   // 42 = size of the native FLAC header data. That's our start point for seeking.
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            return DRFLAC_FALSE;
+        }
 
         return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
     }
 
 
-    assert(origin == drflac_seek_origin_current);
+    drflac_assert(origin == drflac_seek_origin_current);
 
     int bytesSeeked = 0;
-    while (bytesSeeked < offset)
-    {
+    while (bytesSeeked < offset) {
         int bytesRemainingToSeek = offset - bytesSeeked;
-        assert(bytesRemainingToSeek >= 0);
+        drflac_assert(bytesRemainingToSeek >= 0);
 
         if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
-            if (!drflac_oggbs__seek_physical(oggbs, bytesRemainingToSeek, drflac_seek_origin_current)) {
-                return DR_FALSE;
-            }
-
             bytesSeeked += bytesRemainingToSeek;
             oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
             break;
@@ -3211,45 +4153,42 @@ static dr_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_or
 
         // If we get here it means some of the requested data is contained in the next pages.
         if (oggbs->bytesRemainingInPage > 0) {
-            if (!drflac_oggbs__seek_physical(oggbs, oggbs->bytesRemainingInPage, drflac_seek_origin_current)) {
-                return DR_FALSE;
-            }
-
             bytesSeeked += (int)oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
         }
 
-        assert(bytesRemainingToSeek > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs)) {
-            break;  // Failed to go to the next chunk. Might have simply hit the end of the stream.
+        drflac_assert(bytesRemainingToSeek > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            // Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch.
+            return DRFLAC_FALSE;
         }
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
-dr_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, uint64_t sample)
+drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
 {
-    drflac_oggbs* oggbs = (drflac_oggbs*)(((int32_t*)pFlac->pExtraData) + pFlac->maxBlockSize*pFlac->channels);
+    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
 
-    uint64_t originalBytePos = oggbs->currentBytePos;   // For recovery.
+    drflac_uint64 originalBytePos = oggbs->currentBytePos;   // For recovery.
 
     // First seek to the first frame.
     if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
     oggbs->bytesRemainingInPage = 0;
 
-    uint64_t runningGranulePosition = 0;
-    uint64_t runningFrameBytePos = oggbs->currentBytePos;   // <-- Points to the OggS identifier.
-    for (;;)
-    {
-        if (!drflac_oggbs__goto_next_page(oggbs)) {
+    drflac_uint64 runningGranulePosition = 0;
+    drflac_uint64 runningFrameBytePos = oggbs->currentBytePos;   // <-- Points to the OggS identifier.
+    for (;;) {
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
             drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-            return DR_FALSE;   // Never did find that sample...
+            return DRFLAC_FALSE;   // Never did find that sample...
         }
 
-        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader);
-        if (oggbs->currentPageHeader.granulePosition*pFlac->channels >= sample) {
+        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
+        if (oggbs->currentPageHeader.granulePosition*pFlac->channels >= sampleIndex) {
             break; // The sample is somewhere in the previous page.
         }
 
@@ -3258,28 +4197,17 @@ dr_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, uint64_t sample)
         // disregard any pages that do not begin a fresh packet.
         if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    // <-- Is it a fresh page?
             if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
-                uint8_t firstBytesInPage[2];
-                if (drflac_oggbs__read_physical(oggbs, firstBytesInPage, 2) != 2) {
-                    drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-                    return DR_FALSE;
-                }
+                drflac_uint8 firstBytesInPage[2];
+                firstBytesInPage[0] = oggbs->pageData[0];
+                firstBytesInPage[1] = oggbs->pageData[1];
+
                 if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    // <-- Does the page begin with a frame's sync code?
                     runningGranulePosition = oggbs->currentPageHeader.granulePosition*pFlac->channels;
                 }
 
-                if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->bytesRemainingInPage-2, drflac_seek_origin_current)) {
-                    drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-                    return DR_FALSE;
-                }
-
                 continue;
             }
         }
-
-        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->bytesRemainingInPage, drflac_seek_origin_current)) {
-            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-            return DR_FALSE;
-        }
     }
 
 
@@ -3288,71 +4216,85 @@ dr_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, uint64_t sample)
     // a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
     // we find the one containing the target sample.
     if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
-    if (!drflac_oggbs__goto_next_page(oggbs)) {
-        return DR_FALSE;
+    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+        return DRFLAC_FALSE;
     }
 
 
     // At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
     // looping over these frames until we find the one containing the sample we're after.
-    uint64_t firstSampleInFrame = runningGranulePosition;
-    for (;;)
-    {
-        // NOTE for later: When using Ogg's page/segment based seeking later on we can't use this function (or any drflac__*
-        // reading functions) because otherwise it will pull extra data for use in it's own internal caches which will then
-        // break the positioning of the read pointer for the Ogg bitstream.
+    drflac_uint64 runningSampleCount = runningGranulePosition;
+    for (;;) {
+        // There are two ways to find the sample and seek past irrelevant frames:
+        //   1) Use the native FLAC decoder.
+        //   2) Use Ogg's framing system.
+        //
+        // Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        // do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        // duplication for the decoding of frame headers.
+        //
+        // Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        // bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        // standard drflac__*() APIs because that will read in extra data for it's own internal caching which in turn breaks
+        // the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        // using the native FLAC decoding APIs, such as drflac__read_next_frame_header(), need to be re-implemented so as to
+        // avoid the use of the drflac_bs object.
+        //
+        // Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+        //   1) Seeking is already partially accellerated using Ogg's paging system in the code block above.
+        //   2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+        //   3) Simplicity.
         if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
-        int channels = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
-        uint64_t lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channels);
-        lastSampleInFrame -= 1; // <-- Zero based.
+        drflac_uint64 firstSampleInFrame = 0;
+        drflac_uint64 lastSampleInFrame = 0;
+        drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
 
-        if (sample >= firstSampleInFrame && sample <= lastSampleInFrame) {
-            break;  // The sample is in this frame.
+        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
+            // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            // it never existed and keep iterating.
+            drflac_result result = drflac__decode_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
+                drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
+                if (samplesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+                return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0;  // <-- If this fails, something bad has happened (it should never fail).
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            // frame never existed and leave the running sample count untouched.
+            drflac_result result = drflac__seek_to_next_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningSampleCount += sampleCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   // CRC mismatch. Pretend this frame never existed.
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
         }
-
-
-        // If we get here it means the sample is not in this frame so we need to move to the next one. Now the cool thing
-        // with Ogg is that we can efficiently seek past the frame by looking at the lacing values of each segment in
-        // the page.
-        firstSampleInFrame = lastSampleInFrame+1;
-
-#if 1
-        // Slow way. This uses the native FLAC decoder to seek past the frame. This is slow because it needs to do a partial
-        // decode of the frame. Although this is how the native version works, we can use Ogg's framing system to make it
-        // more efficient. Leaving this here for reference and to use as a basis for debugging purposes.
-        if (!drflac__seek_to_next_frame(pFlac)) {
-            return DR_FALSE;
-        }
-#else
-        // TODO: This is not yet complete. See note at the top of this loop body.
-
-        // Fast(er) way. This uses Ogg's framing system to seek past the frame. This should be much more efficient than the
-        // native FLAC seeking.
-        if (!drflac_oggbs__seek_to_next_frame(oggbs)) {
-            return DR_FALSE;
-        }
-#endif
     }
-
-    assert(firstSampleInFrame <= sample);
-
-    if (!drflac__decode_frame(pFlac)) {
-        return DR_FALSE;
-    }
-
-    size_t samplesToDecode = (size_t)(sample - firstSampleInFrame);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
-    return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;
 }
 
 
-dr_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD)
+drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
 {
     // Pre: The bit stream should be sitting just past the 4-byte OggS capture pattern.
+    (void)relaxed;
 
     pInit->container = drflac_container_ogg;
     pInit->oggFirstBytePos = 0;
@@ -3362,88 +4304,85 @@ dr_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc on
     // any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
     drflac_ogg_page_header header;
 
-    uint32_t headerSize;
-    if (!drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &headerSize)) {
-        return DR_FALSE;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+        return DRFLAC_FALSE;
     }
-    pInit->runningFilePos = headerSize;
+    pInit->runningFilePos += bytesRead;
 
-    for (;;)
-    {
+    for (;;) {
         // Break if we're past the beginning of stream page.
         if ((header.headerType & 0x02) == 0) {
-            return DR_FALSE;
+            return DRFLAC_FALSE;
         }
 
 
         // Check if it's a FLAC header.
         int pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize == 51)   // 51 = the lacing value of the FLAC header packet.
-        {
+        if (pageBodySize == 51) {   // 51 = the lacing value of the FLAC header packet.
             // It could be a FLAC page...
-            uint32_t bytesRemainingInPage = pageBodySize;
+            drflac_uint32 bytesRemainingInPage = pageBodySize;
 
-            uint8_t packetType;
+            drflac_uint8 packetType;
             if (onRead(pUserData, &packetType, 1) != 1) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
 
             bytesRemainingInPage -= 1;
-            if (packetType == 0x7F)
-            {
+            if (packetType == 0x7F) {
                 // Increasingly more likely to be a FLAC page...
-                uint8_t sig[4];
+                drflac_uint8 sig[4];
                 if (onRead(pUserData, sig, 4) != 4) {
-                    return DR_FALSE;
+                    return DRFLAC_FALSE;
                 }
 
                 bytesRemainingInPage -= 4;
-                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C')
-                {
+                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
                     // Almost certainly a FLAC page...
-                    uint8_t mappingVersion[2];
+                    drflac_uint8 mappingVersion[2];
                     if (onRead(pUserData, mappingVersion, 2) != 2) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     if (mappingVersion[0] != 1) {
-                        return DR_FALSE;   // Only supporting version 1.x of the Ogg mapping.
+                        return DRFLAC_FALSE;   // Only supporting version 1.x of the Ogg mapping.
                     }
 
                     // The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
                     // be handling it in a generic way based on the serial number and packet types.
                     if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
                     // Expecting the native FLAC signature "fLaC".
                     if (onRead(pUserData, sig, 4) != 4) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
 
-                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C')
-                    {
+                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
                         // The remaining data in the page should be the STREAMINFO block.
-                        uint8_t isLastBlock;
-                        uint8_t blockType;
-                        uint32_t blockSize;
+                        drflac_uint8 isLastBlock;
+                        drflac_uint8 blockType;
+                        drflac_uint32 blockSize;
                         if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-                            return DR_FALSE;
+                            return DRFLAC_FALSE;
                         }
 
                         if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-                            return DR_FALSE;    // Invalid block type. First block must be the STREAMINFO block.
+                            return DRFLAC_FALSE;    // Invalid block type. First block must be the STREAMINFO block.
                         }
 
                         drflac_streaminfo streaminfo;
-                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo))
-                        {
+                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
                             // Success!
-                            pInit->sampleRate       = streaminfo.sampleRate;
-                            pInit->channels         = streaminfo.channels;
-                            pInit->bitsPerSample    = streaminfo.bitsPerSample;
-                            pInit->totalSampleCount = streaminfo.totalSampleCount;
-                            pInit->maxBlockSize     = streaminfo.maxBlockSize;
+                            pInit->hasStreamInfoBlock = DRFLAC_TRUE;
+                            pInit->sampleRate         = streaminfo.sampleRate;
+                            pInit->channels           = streaminfo.channels;
+                            pInit->bitsPerSample      = streaminfo.bitsPerSample;
+                            pInit->totalSampleCount   = streaminfo.totalSampleCount;
+                            pInit->maxBlockSize       = streaminfo.maxBlockSize;
+                            pInit->hasMetadataBlocks  = !isLastBlock;
 
                             if (onMeta) {
                                 drflac_metadata metadata;
@@ -3459,39 +4398,29 @@ dr_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc on
                             pInit->oggSerial        = header.serialNumber;
                             pInit->oggBosHeader     = header;
                             break;
-                        }
-                        else
-                        {
+                        } else {
                             // Failed to read STREAMINFO block. Aww, so close...
-                            return DR_FALSE;
+                            return DRFLAC_FALSE;
                         }
-                    }
-                    else
-                    {
+                    } else {
                         // Invalid file.
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
-                }
-                else
-                {
+                } else {
                     // Not a FLAC header. Skip it.
                     if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                        return DR_FALSE;
+                        return DRFLAC_FALSE;
                     }
                 }
-            }
-            else
-            {
+            } else {
                 // Not a FLAC header. Seek past the entire page and move on to the next.
                 if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                    return DR_FALSE;
+                    return DRFLAC_FALSE;
                 }
             }
-        }
-        else
-        {
+        } else {
             if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
-                return DR_FALSE;
+                return DRFLAC_FALSE;
             }
         }
 
@@ -3499,85 +4428,153 @@ dr_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc on
 
 
         // Read the header of the next page.
-        if (!drflac_ogg__read_page_header(onRead, pUserData, &header, &headerSize)) {
-            return DR_FALSE;
+        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
         }
-        pInit->runningFilePos += headerSize;
+        pInit->runningFilePos += bytesRead;
     }
 
 
     // If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
     // packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialiation phase for Ogg is to create the
     // Ogg bistream object.
-    pInit->hasMetadataBlocks = DR_TRUE;    // <-- Always have at least VORBIS_COMMENT metadata block.
-    return DR_TRUE;
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    // <-- Always have at least VORBIS_COMMENT metadata block.
+    return DRFLAC_TRUE;
 }
 #endif
 
-dr_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD)
+drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
 {
     if (pInit == NULL || onRead == NULL || onSeek == NULL) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
     }
 
-    pInit->onRead        = onRead;
-    pInit->onSeek        = onSeek;
-    pInit->onMeta        = onMeta;
-    pInit->pUserData     = pUserData;
-    pInit->pUserDataMD   = pUserDataMD;
+    drflac_zero_memory(pInit, sizeof(*pInit));
+    pInit->onRead       = onRead;
+    pInit->onSeek       = onSeek;
+    pInit->onMeta       = onMeta;
+    pInit->container    = container;
+    pInit->pUserData    = pUserData;
+    pInit->pUserDataMD  = pUserDataMD;
 
-    uint8_t id[4];
-    if (onRead(pUserData, id, 4) != 4) {
-        return DR_FALSE;
+    pInit->bs.onRead    = onRead;
+    pInit->bs.onSeek    = onSeek;
+    pInit->bs.pUserData = pUserData;
+    drflac__reset_cache(&pInit->bs);
+
+
+    // If the container is explicitly defined then we can try opening in relaxed mode.
+    drflac_bool32 relaxed = container != drflac_container_unknown;
+
+    drflac_uint8 id[4];
+
+    // Skip over any ID3 tags.
+    for (;;) {
+        if (onRead(pUserData, id, 4) != 4) {
+            return DRFLAC_FALSE;    // Ran out of data.
+        }
+        pInit->runningFilePos += 4;
+
+        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
+            drflac_uint8 header[6];
+            if (onRead(pUserData, header, 6) != 6) {
+                return DRFLAC_FALSE;    // Ran out of data.
+            }
+            pInit->runningFilePos += 6;
+
+            drflac_uint8 flags = header[1];
+            drflac_uint32 headerSize;
+            drflac_copy_memory(&headerSize, header+2, 4);
+            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
+            if (flags & 0x10) {
+                headerSize += 10;
+            }
+
+            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;    // Failed to seek past the tag.
+            }
+            pInit->runningFilePos += headerSize;
+        } else {
+            break;
+        }
     }
 
     if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
-        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD);
+        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
     }
-
 #ifndef DR_FLAC_NO_OGG
     if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
-        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD);
+        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
     }
 #endif
 
+    // If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable.
+    if (relaxed) {
+        if (container == drflac_container_native) {
+            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#ifndef DR_FLAC_NO_OGG
+        if (container == drflac_container_ogg) {
+            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#endif
+    }
+
     // Unsupported container.
-    return DR_FALSE;
+    return DRFLAC_FALSE;
 }
 
 void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit)
 {
-    assert(pFlac != NULL);
-    assert(pInit != NULL);
-
-    memset(pFlac, 0, sizeof(*pFlac));
-    pFlac->bs.onRead        = pInit->onRead;
-    pFlac->bs.onSeek        = pInit->onSeek;
-    pFlac->bs.pUserData     = pInit->pUserData;
-    pFlac->bs.nextL2Line    = sizeof(pFlac->bs.cacheL2) / sizeof(pFlac->bs.cacheL2[0]); // <-- Initialize to this to force a client-side data retrieval right from the start.
-    pFlac->bs.consumedBits  = sizeof(pFlac->bs.cache)*8;
+    drflac_assert(pFlac != NULL);
+    drflac_assert(pInit != NULL);
 
+    drflac_zero_memory(pFlac, sizeof(*pFlac));
+    pFlac->bs               = pInit->bs;
     pFlac->onMeta           = pInit->onMeta;
     pFlac->pUserDataMD      = pInit->pUserDataMD;
     pFlac->maxBlockSize     = pInit->maxBlockSize;
     pFlac->sampleRate       = pInit->sampleRate;
-    pFlac->channels         = (uint8_t)pInit->channels;
-    pFlac->bitsPerSample    = (uint8_t)pInit->bitsPerSample;
+    pFlac->channels         = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample    = (drflac_uint8)pInit->bitsPerSample;
     pFlac->totalSampleCount = pInit->totalSampleCount;
     pFlac->container        = pInit->container;
 }
 
-drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD)
+drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
 {
+#ifndef DRFLAC_NO_CPUID
+    // CPU support first.
+    drflac__init_cpu_caps();
+#endif
+
     drflac_init_info init;
-    if (!drflac__init_private(&init, onRead, onSeek, onMeta, pUserData, pUserDataMD)) {
+    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
         return NULL;
     }
 
-    size_t allocationSize = sizeof(drflac);
-    allocationSize += init.maxBlockSize * init.channels * sizeof(int32_t);
-    //allocationSize += init.seektableSize;
+    // The size of the allocation for the drflac object needs to be large enough to fit the following:
+    //   1) The main members of the drflac structure
+    //   2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+    //   3) If the container is Ogg, a drflac_oggbs object
+    //
+    // The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    // the different SIMD instruction sets.
+    drflac_uint32 allocationSize = sizeof(drflac);
 
+    // The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    // we are supporting.
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    if ((init.maxBlockSize % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    } else {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+    }
+
+    drflac_uint32 decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+
+    allocationSize += decodedSamplesAllocationSize;
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  // Allocate extra bytes to ensure we have enough for alignment.
 
 #ifndef DR_FLAC_NO_OGG
     // There's additional data required for Ogg streams.
@@ -3586,13 +4583,13 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
     }
 #endif
 
-    drflac* pFlac = (drflac*)malloc(allocationSize);
+    drflac* pFlac = (drflac*)DRFLAC_MALLOC(allocationSize);
     drflac__init_from_info(pFlac, &init);
-    pFlac->pDecodedSamples = (int32_t*)pFlac->pExtraData;
+    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
 
 #ifndef DR_FLAC_NO_OGG
     if (init.container == drflac_container_ogg) {
-        drflac_oggbs* oggbs = (drflac_oggbs*)(((int32_t*)pFlac->pExtraData) + init.maxBlockSize*init.channels);
+        drflac_oggbs* oggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
         oggbs->onRead = onRead;
         oggbs->onSeek = onSeek;
         oggbs->pUserData = pUserData;
@@ -3606,17 +4603,42 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
         pFlac->bs.onRead = drflac__on_read_ogg;
         pFlac->bs.onSeek = drflac__on_seek_ogg;
         pFlac->bs.pUserData = (void*)oggbs;
+        pFlac->_oggbs = (void*)oggbs;
     }
 #endif
 
     // Decode metadata before returning.
     if (init.hasMetadataBlocks) {
         if (!drflac__read_and_decode_metadata(pFlac)) {
-            free(pFlac);
+            DRFLAC_FREE(pFlac);
             return NULL;
         }
     }
 
+    // If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    // the first frame.
+    if (!init.hasStreamInfoBlock) {
+        pFlac->currentFrame.header = init.firstFrameHeader;
+        do
+        {
+            drflac_result result = drflac__decode_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                break;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+                        DRFLAC_FREE(pFlac);
+                        return NULL;
+                    }
+                    continue;
+                } else {
+                    DRFLAC_FREE(pFlac);
+                    return NULL;
+                }
+            }
+        } while (1);
+    }
+
     return pFlac;
 }
 
@@ -3633,9 +4655,9 @@ static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t byt
     return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
 }
 
-static dr_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
 {
-    assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
 
     return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
 }
@@ -3664,9 +4686,14 @@ static void drflac__close_file_handle(drflac_file file)
 #else
 #include <windows.h>
 
+// This doesn't seem to be defined for VC6.
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+
 static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
-    assert(bytesToRead < 0xFFFFFFFF);   // dr_flac will never request huge amounts of data at a time. This is a safe assertion.
+    drflac_assert(bytesToRead < 0xFFFFFFFF);   // dr_flac will never request huge amounts of data at a time. This is a safe assertion.
 
     DWORD bytesRead;
     ReadFile((HANDLE)pUserData, bufferOut, (DWORD)bytesToRead, &bytesRead, NULL);
@@ -3674,9 +4701,9 @@ static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t byt
     return (size_t)bytesRead;
 }
 
-static dr_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
 {
-    assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
 
     return SetFilePointer((HANDLE)pUserData, offset, NULL, (origin == drflac_seek_origin_current) ? FILE_CURRENT : FILE_BEGIN) != INVALID_SET_FILE_POINTER;
 }
@@ -3721,7 +4748,7 @@ drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc on
         return NULL;
     }
 
-    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, (void*)file, pUserData);
+    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)file, pUserData);
     if (pFlac == NULL) {
         drflac__close_file_handle(file);
         return pFlac;
@@ -3734,8 +4761,8 @@ drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc on
 static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
     drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-    assert(memoryStream != NULL);
-    assert(memoryStream->dataSize >= memoryStream->currentReadPos);
+    drflac_assert(memoryStream != NULL);
+    drflac_assert(memoryStream->dataSize >= memoryStream->currentReadPos);
 
     size_t bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
     if (bytesToRead > bytesRemaining) {
@@ -3743,18 +4770,18 @@ static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t by
     }
 
     if (bytesToRead > 0) {
-        memcpy(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
+        drflac_copy_memory(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
         memoryStream->currentReadPos += bytesToRead;
     }
 
     return bytesToRead;
 }
 
-static dr_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
+static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
 {
     drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-    assert(memoryStream != NULL);
-    assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    drflac_assert(memoryStream != NULL);
+    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
 
     if (origin == drflac_seek_origin_current) {
         if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
@@ -3763,14 +4790,14 @@ static dr_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek
             memoryStream->currentReadPos = memoryStream->dataSize;  // Trying to seek too far forward.
         }
     } else {
-        if ((uint32_t)offset <= memoryStream->dataSize) {
+        if ((drflac_uint32)offset <= memoryStream->dataSize) {
             memoryStream->currentReadPos = offset;
         } else {
             memoryStream->currentReadPos = memoryStream->dataSize;  // Trying to seek too far forward.
         }
     }
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 drflac* drflac_open_memory(const void* data, size_t dataSize)
@@ -3790,7 +4817,7 @@ drflac* drflac_open_memory(const void* data, size_t dataSize)
 #ifndef DR_FLAC_NO_OGG
     if (pFlac->container == drflac_container_ogg)
     {
-        drflac_oggbs* oggbs = (drflac_oggbs*)(((int32_t*)pFlac->pExtraData) + pFlac->maxBlockSize*pFlac->channels);
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
         oggbs->pUserData = &pFlac->memoryStream;
     }
     else
@@ -3808,7 +4835,7 @@ drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drfl
     memoryStream.data = (const unsigned char*)data;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
-    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, &memoryStream, pUserData);
+    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -3819,7 +4846,7 @@ drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drfl
 #ifndef DR_FLAC_NO_OGG
     if (pFlac->container == drflac_container_ogg)
     {
-        drflac_oggbs* oggbs = (drflac_oggbs*)(((int32_t*)pFlac->pExtraData) + pFlac->maxBlockSize*pFlac->channels);
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
         oggbs->pUserData = &pFlac->memoryStream;
     }
     else
@@ -3835,12 +4862,20 @@ drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drfl
 
 drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, pUserData, pUserData);
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData);
+}
+drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData);
 }
 
 drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, pUserData, pUserData);
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData);
+}
+drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData);
 }
 
 void drflac_close(drflac* pFlac)
@@ -3859,8 +4894,8 @@ void drflac_close(drflac* pFlac)
 #ifndef DR_FLAC_NO_OGG
     // Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained.
     if (pFlac->container == drflac_container_ogg) {
-        assert(pFlac->bs.onRead == drflac__on_read_ogg);
-        drflac_oggbs* oggbs = (drflac_oggbs*)((int32_t*)pFlac->pExtraData + pFlac->maxBlockSize*pFlac->channels);
+        drflac_assert(pFlac->bs.onRead == drflac__on_read_ogg);
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
         if (oggbs->onRead == drflac__on_read_stdio) {
             drflac__close_file_handle((drflac_file)oggbs->pUserData);
         }
@@ -3868,26 +4903,25 @@ void drflac_close(drflac* pFlac)
 #endif
 #endif
 
-    free(pFlac);
+    DRFLAC_FREE(pFlac);
 }
 
-uint64_t drflac__read_s32__misaligned(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferOut)
+drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut)
 {
     unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
 
     // We should never be calling this when the number of samples to read is >= the sample count.
-    assert(samplesToRead < channelCount);
-    assert(pFlac->currentFrame.samplesRemaining > 0 && samplesToRead <= pFlac->currentFrame.samplesRemaining);
+    drflac_assert(samplesToRead < channelCount);
+    drflac_assert(pFlac->currentFrame.samplesRemaining > 0 && samplesToRead <= pFlac->currentFrame.samplesRemaining);
 
 
-    uint64_t samplesRead = 0;
-    while (samplesToRead > 0)
-    {
-        uint64_t totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
-        uint64_t samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
-        unsigned int channelIndex = samplesReadFromFrameSoFar % channelCount;
+    drflac_uint64 samplesRead = 0;
+    while (samplesToRead > 0) {
+        drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
+        drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
+        drflac_uint64 channelIndex = samplesReadFromFrameSoFar % channelCount;
 
-        uint64_t nextSampleInFrame = samplesReadFromFrameSoFar / channelCount;
+        drflac_uint64 nextSampleInFrame = samplesReadFromFrameSoFar / channelCount;
 
         int decodedSample = 0;
         switch (pFlac->currentFrame.header.channelAssignment)
@@ -3901,7 +4935,6 @@ uint64_t drflac__read_s32__misaligned(drflac* pFlac, uint64_t samplesToRead, int
                     int left = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame];
                     decodedSample = left - side;
                 }
-
             } break;
 
             case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
@@ -3913,7 +4946,6 @@ uint64_t drflac__read_s32__misaligned(drflac* pFlac, uint64_t samplesToRead, int
                 } else {
                     decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame];
                 }
-
             } break;
 
             case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
@@ -3933,7 +4965,6 @@ uint64_t drflac__read_s32__misaligned(drflac* pFlac, uint64_t samplesToRead, int
                     mid = (((unsigned int)mid) << 1) | (side & 0x01);
                     decodedSample = (mid - side) >> 1;
                 }
-
             } break;
 
             case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
@@ -3958,19 +4989,15 @@ uint64_t drflac__read_s32__misaligned(drflac* pFlac, uint64_t samplesToRead, int
     return samplesRead;
 }
 
-uint64_t drflac__seek_forward_by_samples(drflac* pFlac, uint64_t samplesToRead)
+drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead)
 {
-    uint64_t samplesRead = 0;
-    while (samplesToRead > 0)
-    {
-        if (pFlac->currentFrame.samplesRemaining == 0)
-        {
+    drflac_uint64 samplesRead = 0;
+    while (samplesToRead > 0) {
+        if (pFlac->currentFrame.samplesRemaining == 0) {
             if (!drflac__read_and_decode_next_frame(pFlac)) {
                 break;  // Couldn't read the next frame, so just break from the loop and return.
             }
-        }
-        else
-        {
+        } else {
             samplesRead += 1;
             pFlac->currentFrame.samplesRemaining -= 1;
             samplesToRead -= 1;
@@ -3980,7 +5007,7 @@ uint64_t drflac__seek_forward_by_samples(drflac* pFlac, uint64_t samplesToRead)
     return samplesRead;
 }
 
-uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferOut)
+drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut)
 {
     // Note that <bufferOut> is allowed to be null, in which case this will be treated as something like a seek.
     if (pFlac == NULL || samplesToRead == 0) {
@@ -3992,27 +5019,23 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
     }
 
 
-    uint64_t samplesRead = 0;
-    while (samplesToRead > 0)
-    {
+    drflac_uint64 samplesRead = 0;
+    while (samplesToRead > 0) {
         // If we've run out of samples in this frame, go to the next.
-        if (pFlac->currentFrame.samplesRemaining == 0)
-        {
+        if (pFlac->currentFrame.samplesRemaining == 0) {
             if (!drflac__read_and_decode_next_frame(pFlac)) {
                 break;  // Couldn't read the next frame, so just break from the loop and return.
             }
-        }
-        else
-        {
+        } else {
             // Here is where we grab the samples and interleave them.
 
             unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
-            uint64_t totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
-            uint64_t samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
+            drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
+            drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
 
-            int misalignedSampleCount = samplesReadFromFrameSoFar % channelCount;
+            drflac_uint64 misalignedSampleCount = samplesReadFromFrameSoFar % channelCount;
             if (misalignedSampleCount > 0) {
-                uint64_t misalignedSamplesRead = drflac__read_s32__misaligned(pFlac, misalignedSampleCount, bufferOut);
+                drflac_uint64 misalignedSamplesRead = drflac__read_s32__misaligned(pFlac, misalignedSampleCount, bufferOut);
                 samplesRead   += misalignedSamplesRead;
                 samplesReadFromFrameSoFar += misalignedSamplesRead;
                 bufferOut     += misalignedSamplesRead;
@@ -4020,22 +5043,22 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
             }
 
 
-            uint64_t alignedSampleCountPerChannel = samplesToRead / channelCount;
+            drflac_uint64 alignedSampleCountPerChannel = samplesToRead / channelCount;
             if (alignedSampleCountPerChannel > pFlac->currentFrame.samplesRemaining / channelCount) {
                 alignedSampleCountPerChannel = pFlac->currentFrame.samplesRemaining / channelCount;
             }
 
-            uint64_t firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount;
+            drflac_uint64 firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount;
             unsigned int unusedBitsPerSample = 32 - pFlac->bitsPerSample;
 
             switch (pFlac->currentFrame.header.channelAssignment)
             {
                 case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
                 {
-                    const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
-                    const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) {
+                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
                         int left  = pDecodedSamples0[i];
                         int side  = pDecodedSamples1[i];
                         int right = left - side;
@@ -4047,10 +5070,10 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
 
                 case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
                 {
-                    const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
-                    const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) {
+                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
                         int side  = pDecodedSamples0[i];
                         int right = pDecodedSamples1[i];
                         int left  = right + side;
@@ -4062,12 +5085,12 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
 
                 case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
                 {
-                    const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
-                    const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
+                    const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) {
+                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
                         int side = pDecodedSamples1[i];
-                        int mid  = (((uint32_t)pDecodedSamples0[i]) << 1) | (side & 0x01);
+                        int mid  = (((drflac_uint32)pDecodedSamples0[i]) << 1) | (side & 0x01);
 
                         bufferOut[i*2+0] = ((mid + side) >> 1) << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
                         bufferOut[i*2+1] = ((mid - side) >> 1) << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
@@ -4080,10 +5103,10 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
                     if (pFlac->currentFrame.header.channelAssignment == 1) // 1 = Stereo
                     {
                         // Stereo optimized inner loop unroll.
-                        const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
-                        const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
+                        const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
+                        const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                        for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
                             bufferOut[i*2+0] = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
                             bufferOut[i*2+1] = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
                         }
@@ -4091,7 +5114,7 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
                     else
                     {
                         // Generic interleaving.
-                        for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
                             for (unsigned int j = 0; j < channelCount; ++j) {
                                 bufferOut[(i*channelCount)+j] = (pFlac->currentFrame.subframes[j].pDecodedSamples[firstAlignedSampleInFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample);
                             }
@@ -4100,7 +5123,7 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
                 } break;
             }
 
-            uint64_t alignedSamplesRead = alignedSampleCountPerChannel * channelCount;
+            drflac_uint64 alignedSamplesRead = alignedSampleCountPerChannel * channelCount;
             samplesRead   += alignedSamplesRead;
             samplesReadFromFrameSoFar += alignedSamplesRead;
             bufferOut     += alignedSamplesRead;
@@ -4110,9 +5133,8 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
 
 
             // At this point we may still have some excess samples left to read.
-            if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0)
-            {
-                uint64_t excessSamplesRead = 0;
+            if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0) {
+                drflac_uint64 excessSamplesRead = 0;
                 if (samplesToRead < pFlac->currentFrame.samplesRemaining) {
                     excessSamplesRead = drflac__read_s32__misaligned(pFlac, samplesToRead, bufferOut);
                 } else {
@@ -4130,35 +5152,66 @@ uint64_t drflac_read_s32(drflac* pFlac, uint64_t samplesToRead, int32_t* bufferO
     return samplesRead;
 }
 
-uint64_t drflac_read_s16(drflac* pFlac, uint64_t samplesToRead, int16_t* pBufferOut)
+drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut)
 {
     // This reads samples in 2 passes and can probably be optimized.
-    uint64_t samplesRead = 0;
+    drflac_uint64 totalSamplesRead = 0;
 
     while (samplesToRead > 0) {
-        int32_t samples32[4096];
-        uint64_t samplesJustRead = drflac_read_s32(pFlac, samplesToRead > 4096 ? 4096 : samplesToRead, samples32);
+        drflac_int32 samples32[4096];
+        drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32);
         if (samplesJustRead == 0) {
             break;  // Reached the end.
         }
 
         // s32 -> s16
-        for (uint64_t i = 0; i < samplesJustRead; ++i) {
-            pBufferOut[i] = (int16_t)(samples32[i] >> 16);
+        for (drflac_uint64 i = 0; i < samplesJustRead; ++i) {
+            pBufferOut[i] = (drflac_int16)(samples32[i] >> 16);
         }
 
-        samplesRead += samplesJustRead;
+        totalSamplesRead += samplesJustRead;
         samplesToRead -= samplesJustRead;
         pBufferOut += samplesJustRead;
     }
 
-    return samplesRead;
+    return totalSamplesRead;
 }
 
-dr_bool32 drflac_seek_to_sample(drflac* pFlac, uint64_t sampleIndex)
+drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut)
+{
+    // This reads samples in 2 passes and can probably be optimized.
+    drflac_uint64 totalSamplesRead = 0;
+
+    while (samplesToRead > 0) {
+        drflac_int32 samples32[4096];
+        drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32);
+        if (samplesJustRead == 0) {
+            break;  // Reached the end.
+        }
+
+        // s32 -> f32
+        for (drflac_uint64 i = 0; i < samplesJustRead; ++i) {
+            pBufferOut[i] = (float)(samples32[i] / 2147483648.0);
+        }
+
+        totalSamplesRead += samplesJustRead;
+        samplesToRead -= samplesJustRead;
+        pBufferOut += samplesJustRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
 {
     if (pFlac == NULL) {
-        return DR_FALSE;
+        return DRFLAC_FALSE;
+    }
+
+    // If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    // when the decoder was opened.
+    if (pFlac->firstFramePos == 0) {
+        return DRFLAC_FALSE;
     }
 
     if (sampleIndex == 0) {
@@ -4188,156 +5241,91 @@ dr_bool32 drflac_seek_to_sample(drflac* pFlac, uint64_t sampleIndex)
     }
 
 
-    return DR_TRUE;
+    return DRFLAC_TRUE;
 }
 
 
 
 //// High Level APIs ////
 
-int32_t* drflac__full_decode_and_close_s32(drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, uint64_t* totalSampleCountOut)
-{
-    assert(pFlac != NULL);
+// I couldn't figure out where SIZE_MAX was defined for VC6. If anybody knows, let me know.
+#if defined(_MSC_VER) && _MSC_VER <= 1200
+#ifdef DRFLAC_64BIT
+#define SIZE_MAX    ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+#else
+#define SIZE_MAX    0xFFFFFFFF
+#endif
+#endif
 
-    int32_t* pSampleData = NULL;
-    uint64_t totalSampleCount = pFlac->totalSampleCount;
-
-    if (totalSampleCount == 0)
-    {
-        int32_t buffer[4096];
-
-        size_t sampleDataBufferSize = sizeof(buffer);
-        pSampleData = (int32_t*)malloc(sampleDataBufferSize);
-        if (pSampleData == NULL) {
-            goto on_error;
-        }
-
-        uint64_t samplesRead;
-        while ((samplesRead = (uint64_t)drflac_read_s32(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0)
-        {
-            if (((totalSampleCount + samplesRead) * sizeof(int32_t)) > sampleDataBufferSize) {
-                sampleDataBufferSize *= 2;
-                int32_t* pNewSampleData = (int32_t*)realloc(pSampleData, sampleDataBufferSize);
-                if (pNewSampleData == NULL) {
-                    free(pSampleData);
-                    goto on_error;
-                }
-
-                pSampleData = pNewSampleData;
-            }
-
-            memcpy(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(int32_t)));
-            totalSampleCount += samplesRead;
-        }
-
-        // At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to
-        // protect those ears from random noise!
-        memset(pSampleData + totalSampleCount, 0, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(int32_t)));
-    }
-    else
-    {
-        uint64_t dataSize = totalSampleCount * sizeof(int32_t);
-        if (dataSize > SIZE_MAX) {
-            goto on_error;  // The decoded data is too big.
-        }
-
-        pSampleData = (int32_t*)malloc((size_t)dataSize);    // <-- Safe cast as per the check above.
-        if (pSampleData == NULL) {
-            goto on_error;
-        }
-
-        uint64_t samplesDecoded = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSampleData);
-        if (samplesDecoded != pFlac->totalSampleCount) {
-            free(pSampleData);
-            goto on_error;  // Something went wrong when decoding the FLAC stream.
-        }
-    }
-
-
-    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;
-    if (channelsOut) *channelsOut = pFlac->channels;
-    if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount;
-
-    drflac_close(pFlac);
-    return pSampleData;
-
-on_error:
-    drflac_close(pFlac);
-    return NULL;
+// Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me.
+#define DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(extension, type) \
+static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)\
+{                                                                                                                                                                   \
+    drflac_assert(pFlac != NULL);                                                                                                                                   \
+                                                                                                                                                                    \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalSampleCount = pFlac->totalSampleCount;                                                                                                       \
+                                                                                                                                                                    \
+    if (totalSampleCount == 0) {                                                                                                                                    \
+        type buffer[4096];                                                                                                                                          \
+                                                                                                                                                                    \
+        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+        pSampleData = (type*)DRFLAC_MALLOC(sampleDataBufferSize);                                                                                                   \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        drflac_uint64 samplesRead;                                                                                                                                  \
+        while ((samplesRead = (drflac_uint64)drflac_read_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0) {                                       \
+            if (((totalSampleCount + samplesRead) * sizeof(type)) > sampleDataBufferSize) {                                                                         \
+                sampleDataBufferSize *= 2;                                                                                                                          \
+                type* pNewSampleData = (type*)DRFLAC_REALLOC(pSampleData, sampleDataBufferSize);                                                                    \
+                if (pNewSampleData == NULL) {                                                                                                                       \
+                    DRFLAC_FREE(pSampleData);                                                                                                                       \
+                    goto on_error;                                                                                                                                  \
+                }                                                                                                                                                   \
+                                                                                                                                                                    \
+                pSampleData = pNewSampleData;                                                                                                                       \
+            }                                                                                                                                                       \
+                                                                                                                                                                    \
+            drflac_copy_memory(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(type)));                                                         \
+            totalSampleCount += samplesRead;                                                                                                                        \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
+           protect those ears from random noise! */                                                                                                                 \
+        drflac_zero_memory(pSampleData + totalSampleCount, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(type)));                                         \
+    } else {                                                                                                                                                        \
+        drflac_uint64 dataSize = totalSampleCount * sizeof(type);                                                                                                   \
+        if (dataSize > SIZE_MAX) {                                                                                                                                  \
+            goto on_error;  /* The decoded data is too big. */                                                                                                      \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        pSampleData = (type*)DRFLAC_MALLOC((size_t)dataSize);    /* <-- Safe cast as per the check above. */                                                        \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        totalSampleCount = drflac_read_##extension(pFlac, pFlac->totalSampleCount, pSampleData);                                                                    \
+    }                                                                                                                                                               \
+                                                                                                                                                                    \
+    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
+    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
+    if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount;                                                                                               \
+                                                                                                                                                                    \
+    drflac_close(pFlac);                                                                                                                                            \
+    return pSampleData;                                                                                                                                             \
+                                                                                                                                                                    \
+on_error:                                                                                                                                                           \
+    drflac_close(pFlac);                                                                                                                                            \
+    return NULL;                                                                                                                                                    \
 }
 
-int16_t* drflac__full_decode_and_close_s16(drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, uint64_t* totalSampleCountOut)
-{
-    assert(pFlac != NULL);
+DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(f32, float)
 
-    int16_t* pSampleData = NULL;
-    uint64_t totalSampleCount = pFlac->totalSampleCount;
-
-    if (totalSampleCount == 0)
-    {
-        int16_t buffer[4096];
-
-        size_t sampleDataBufferSize = sizeof(buffer);
-        pSampleData = (int16_t*)malloc(sampleDataBufferSize);
-        if (pSampleData == NULL) {
-            goto on_error;
-        }
-
-        uint64_t samplesRead;
-        while ((samplesRead = (uint64_t)drflac_read_s16(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0)
-        {
-            if (((totalSampleCount + samplesRead) * sizeof(int16_t)) > sampleDataBufferSize) {
-                sampleDataBufferSize *= 2;
-                int16_t* pNewSampleData = (int16_t*)realloc(pSampleData, sampleDataBufferSize);
-                if (pNewSampleData == NULL) {
-                    free(pSampleData);
-                    goto on_error;
-                }
-
-                pSampleData = pNewSampleData;
-            }
-
-            memcpy(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(int16_t)));
-            totalSampleCount += samplesRead;
-        }
-
-        // At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to
-        // protect those ears from random noise!
-        memset(pSampleData + totalSampleCount, 0, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(int16_t)));
-    }
-    else
-    {
-        uint64_t dataSize = totalSampleCount * sizeof(int16_t);
-        if (dataSize > SIZE_MAX) {
-            goto on_error;  // The decoded data is too big.
-        }
-
-        pSampleData = (int16_t*)malloc((size_t)dataSize);    // <-- Safe cast as per the check above.
-        if (pSampleData == NULL) {
-            goto on_error;
-        }
-
-        uint64_t samplesDecoded = drflac_read_s16(pFlac, pFlac->totalSampleCount, pSampleData);
-        if (samplesDecoded != pFlac->totalSampleCount) {
-            free(pSampleData);
-            goto on_error;  // Something went wrong when decoding the FLAC stream.
-        }
-    }
-
-
-    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;
-    if (channelsOut) *channelsOut = pFlac->channels;
-    if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount;
-
-    drflac_close(pFlac);
-    return pSampleData;
-
-on_error:
-    drflac_close(pFlac);
-    return NULL;
-}
-
-int32_t* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     // Safety.
     if (sampleRate) *sampleRate = 0;
@@ -4352,7 +5340,7 @@ int32_t* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc on
     return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
 }
 
-int16_t* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     // Safety.
     if (sampleRate) *sampleRate = 0;
@@ -4367,8 +5355,23 @@ int16_t* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc on
     return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
 }
 
+float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+{
+    // Safety.
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drflac* pFlac = drflac_open(onRead, onSeek, pUserData);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+}
+
 #ifndef DR_FLAC_NO_STDIO
-int32_t* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     if (sampleRate) *sampleRate = 0;
     if (channels) *channels = 0;
@@ -4382,7 +5385,7 @@ int32_t* drflac_open_and_decode_file_s32(const char* filename, unsigned int* cha
     return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
 }
 
-int16_t* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     if (sampleRate) *sampleRate = 0;
     if (channels) *channels = 0;
@@ -4395,9 +5398,23 @@ int16_t* drflac_open_and_decode_file_s16(const char* filename, unsigned int* cha
 
     return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
 }
+
+float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drflac* pFlac = drflac_open_file(filename);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+}
 #endif
 
-int32_t* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     if (sampleRate) *sampleRate = 0;
     if (channels) *channels = 0;
@@ -4411,7 +5428,7 @@ int32_t* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, un
     return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
 }
 
-int16_t* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, uint64_t* totalSampleCount)
+drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
 {
     if (sampleRate) *sampleRate = 0;
     if (channels) *channels = 0;
@@ -4425,15 +5442,29 @@ int16_t* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, un
     return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
 }
 
+float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drflac* pFlac = drflac_open_memory(data, dataSize);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+}
+
 void drflac_free(void* pSampleDataReturnedByOpenAndDecode)
 {
-    free(pSampleDataReturnedByOpenAndDecode);
+    DRFLAC_FREE(pSampleDataReturnedByOpenAndDecode);
 }
 
 
 
 
-void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, uint32_t commentCount, const char* pComments)
+void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const char* pComments)
 {
     if (pIter == NULL) {
         return;
@@ -4443,7 +5474,7 @@ void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter,
     pIter->pRunningData   = pComments;
 }
 
-const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, uint32_t* pCommentLengthOut)
+const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
 {
     // Safety.
     if (pCommentLengthOut) *pCommentLengthOut = 0;
@@ -4452,7 +5483,7 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, ui
         return NULL;
     }
 
-    uint32_t length = drflac__le2host_32(*(uint32_t*)pIter->pRunningData);
+    drflac_uint32 length = drflac__le2host_32(*(drflac_uint32*)pIter->pRunningData);
     pIter->pRunningData += 4;
 
     const char* pComment = pIter->pRunningData;
@@ -4467,20 +5498,63 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, ui
 
 // REVISION HISTORY
 //
+// v0.8d - 2017-09-22
+//   - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+//
+// v0.8c - 2017-09-07
+//   - Fix warning on non-x86/x64 architectures.
+//
+// v0.8b - 2017-08-19
+//   - Fix build on non-x86/x64 architectures.
+//
+// v0.8a - 2017-08-13
+//   - A small optimization for the Clang build.
+//
+// v0.8 - 2017-08-12
+//   - API CHANGE: Rename dr_* types to drflac_*.
+//   - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+//   - Add support for custom implementations of malloc(), realloc(), etc.
+//   - Add CRC checking to Ogg encapsulated streams.
+//   - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+//   - Bug fixes.
+//
+// v0.7 - 2017-07-23
+//   - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+//
+// v0.6 - 2017-07-22
+//   - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+//     never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+//
+// v0.5 - 2017-07-16
+//   - Fix typos.
+//   - Change drflac_bool* types to unsigned.
+//   - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+//
+// v0.4f - 2017-03-10
+//   - Fix a couple of bugs with the bitstreaming code.
+//
+// v0.4e - 2017-02-17
+//   - Fix some warnings.
+//
+// v0.4d - 2016-12-26
+//   - Add support for 32-bit floating-point PCM decoding.
+//   - Use drflac_int*/drflac_uint* sized types to improve compiler support.
+//   - Minor improvements to documentation.
+//
 // v0.4c - 2016-12-26
 //   - Add support for signed 16-bit integer PCM decoding.
 //
 // v0.4b - 2016-10-23
-//   - A minor change to dr_bool8 and dr_bool32 types.
+//   - A minor change to drflac_bool8 and drflac_bool32 types.
 //
 // v0.4a - 2016-10-11
-//   - Rename drBool32 to dr_bool32 for styling consistency.
+//   - Rename drBool32 to drflac_bool32 for styling consistency.
 //
 // v0.4 - 2016-09-29
 //   - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
-//   - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32()
+//   - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
 //   - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
-//     keep it consistent with dr_audio.
+//     keep it consistent with drflac_audio.
 //
 // v0.3f - 2016-09-21
 //   - Fix a warning with GCC.
@@ -4533,11 +5607,6 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, ui
 //   - Initial versioned release.
 
 
-// TODO
-// - Add support for initializing the decoder without a header STREAMINFO block.
-// - Test CUESHEET metadata blocks.
-
-
 /*
 This is free and unencumbered software released into the public domain.
 
diff --git a/raylib/external/dr_wav.h b/raylib/external/dr_wav.h
new file mode 100644
index 0000000..536df8f
--- /dev/null
+++ b/raylib/external/dr_wav.h
@@ -0,0 +1,3455 @@
+// WAV audio loader and writer. Public domain. See "unlicense" statement at the end of this file.
+// dr_wav - v0.7a - 2017-11-17
+//
+// David Reid - mackron@gmail.com
+
+// USAGE
+//
+// This is a single-file library. To use it, do something like the following in one .c file.
+//     #define DR_WAV_IMPLEMENTATION
+//     #include "dr_wav.h"
+//
+// You can then #include this file in other parts of the program as you would with any other header file. Do something
+// like the following to read audio data:
+//
+//     drwav wav;
+//     if (!drwav_init_file(&wav, "my_song.wav")) {
+//         // Error opening WAV file.
+//     }
+//
+//     drwav_int32* pDecodedInterleavedSamples = malloc(wav.totalSampleCount * sizeof(drwav_int32));
+//     size_t numberOfSamplesActuallyDecoded = drwav_read_s32(&wav, wav.totalSampleCount, pDecodedInterleavedSamples);
+//
+//     ...
+//
+//     drwav_uninit(&wav);
+//
+// You can also use drwav_open() to allocate and initialize the loader for you:
+//
+//     drwav* pWav = drwav_open_file("my_song.wav");
+//     if (pWav == NULL) {
+//         // Error opening WAV file.
+//     }
+//
+//     ...
+//
+//     drwav_close(pWav);
+//
+// If you just want to quickly open and read the audio data in a single operation you can do something like this:
+//
+//     unsigned int channels;
+//     unsigned int sampleRate;
+//     drwav_uint64 totalSampleCount;
+//     float* pSampleData = drwav_open_and_read_file_s32("my_song.wav", &channels, &sampleRate, &totalSampleCount);
+//     if (pSampleData == NULL) {
+//         // Error opening and reading WAV file.
+//     }
+//
+//     ...
+//
+//     drwav_free(pSampleData);
+//
+// The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in
+// this case), but you can still output the audio data in it's internal format (see notes below for supported formats):
+//
+//     size_t samplesRead = drwav_read(&wav, wav.totalSampleCount, pDecodedInterleavedSamples);
+//
+// You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for
+// a particular data format:
+//
+//     size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
+//
+//
+// dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work
+// without any manual intervention.
+//
+//
+// dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at
+// drwav_open_write(), drwav_open_file_write(), etc. Use drwav_write() to write samples, or drwav_write_raw() to write
+// raw data in the "data" chunk.
+//
+//     drwav_data_format format;
+//     format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
+//     format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
+//     format.channels = 2;
+//     format.sampleRate = 44100;
+//     format.bitsPerSample = 16;
+//     drwav* pWav = drwav_open_file_write("data/recording.wav", &format);
+//
+//     ...
+//
+//     drwav_uint64 samplesWritten = drwav_write(pWav, sampleCount, pSamples);
+//
+//
+//
+// OPTIONS
+// #define these options before including this file.
+//
+// #define DR_WAV_NO_CONVERSION_API
+//   Disables conversion APIs such as drwav_read_f32() and drwav_s16_to_f32().
+//
+// #define DR_WAV_NO_STDIO
+//   Disables drwav_open_file(), drwav_open_file_write(), etc.
+//
+//
+//
+// QUICK NOTES
+// - Samples are always interleaved.
+// - The default read function does not do any data conversion. Use drwav_read_f32() to read and convert audio data
+//   to IEEE 32-bit floating point samples, drwav_read_s32() to read samples as signed 32-bit PCM and drwav_read_s16()
+//   to read samples as signed 16-bit PCM. Tested and supported internal formats include the following:
+//   - Unsigned 8-bit PCM
+//   - Signed 12-bit PCM
+//   - Signed 16-bit PCM
+//   - Signed 24-bit PCM
+//   - Signed 32-bit PCM
+//   - IEEE 32-bit floating point.
+//   - IEEE 64-bit floating point.
+//   - A-law and u-law
+//   - Microsoft ADPCM
+//   - IMA ADPCM (DVI, format code 0x11)
+// - dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+
+
+#ifndef dr_wav_h
+#define dr_wav_h
+
+#include <stddef.h>
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+typedef   signed char    drwav_int8;
+typedef unsigned char    drwav_uint8;
+typedef   signed short   drwav_int16;
+typedef unsigned short   drwav_uint16;
+typedef   signed int     drwav_int32;
+typedef unsigned int     drwav_uint32;
+typedef   signed __int64 drwav_int64;
+typedef unsigned __int64 drwav_uint64;
+#else
+#include <stdint.h>
+typedef int8_t           drwav_int8;
+typedef uint8_t          drwav_uint8;
+typedef int16_t          drwav_int16;
+typedef uint16_t         drwav_uint16;
+typedef int32_t          drwav_int32;
+typedef uint32_t         drwav_uint32;
+typedef int64_t          drwav_int64;
+typedef uint64_t         drwav_uint64;
+#endif
+typedef drwav_uint8      drwav_bool8;
+typedef drwav_uint32     drwav_bool32;
+#define DRWAV_TRUE       1
+#define DRWAV_FALSE      0
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Common data formats.
+#define DR_WAVE_FORMAT_PCM          0x1
+#define DR_WAVE_FORMAT_ADPCM        0x2
+#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
+#define DR_WAVE_FORMAT_ALAW         0x6
+#define DR_WAVE_FORMAT_MULAW        0x7
+#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
+#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
+
+typedef enum
+{
+    drwav_seek_origin_start,
+    drwav_seek_origin_current
+} drwav_seek_origin;
+
+typedef enum
+{
+    drwav_container_riff,
+    drwav_container_w64
+} drwav_container;
+
+// Callback for when data is read. Return value is the number of bytes actually read.
+//
+// pUserData   [in]  The user data that was passed to drwav_init(), drwav_open() and family.
+// pBufferOut  [out] The output buffer.
+// bytesToRead [in]  The number of bytes to read.
+//
+// Returns the number of bytes actually read.
+//
+// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+// either the entire bytesToRead is filled or you have reached the end of the stream.
+typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+// Callback for when data is written. Returns value is the number of bytes actually written.
+//
+// pUserData    [in]  The user data that was passed to drwav_init_write(), drwav_open_write() and family.
+// pData        [out] A pointer to the data to write.
+// bytesToWrite [in]  The number of bytes to write.
+//
+// Returns the number of bytes actually written.
+//
+// If the return value differs from bytesToWrite, it indicates an error.
+typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
+
+// Callback for when data needs to be seeked.
+//
+// pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family.
+// offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+// origin    [in] The origin of the seek - the current position or the start of the stream.
+//
+// Returns whether or not the seek was successful.
+//
+// Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
+// will be either drwav_seek_origin_start or drwav_seek_origin_current.
+typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
+
+// Structure for internal use. Only used for loaders opened with drwav_open_memory().
+typedef struct
+{
+    const drwav_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drwav__memory_stream;
+
+// Structure for internal use. Only used for writers opened with drwav_open_memory_write().
+typedef struct
+{
+    void** ppData;
+    size_t* pDataSize;
+    size_t dataSize;
+    size_t dataCapacity;
+    size_t currentWritePos;
+} drwav__memory_stream_write;
+
+typedef struct
+{
+    drwav_container container;  // RIFF, W64.
+    drwav_uint32 format;        // DR_WAVE_FORMAT_*
+    drwav_uint32 channels;
+    drwav_uint32 sampleRate;
+    drwav_uint32 bitsPerSample;
+} drwav_data_format;
+
+typedef struct
+{
+    // The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
+    // that require support for data formats not natively supported by dr_wav.
+    drwav_uint16 formatTag;
+
+    // The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc.
+    drwav_uint16 channels;
+
+    // The sample rate. Usually set to something like 44100.
+    drwav_uint32 sampleRate;
+
+    // Average bytes per second. You probably don't need this, but it's left here for informational purposes.
+    drwav_uint32 avgBytesPerSec;
+
+    // Block align. This is equal to the number of channels * bytes per sample.
+    drwav_uint16 blockAlign;
+
+    // Bit's per sample.
+    drwav_uint16 bitsPerSample;
+
+    // The size of the extended data. Only used internally for validation, but left here for informational purposes.
+    drwav_uint16 extendedSize;
+
+    // The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
+    // is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
+    // many bits a valid per sample. Mainly used for informational purposes.
+    drwav_uint16 validBitsPerSample;
+
+    // The channel mask. Not used at the moment.
+    drwav_uint32 channelMask;
+
+    // The sub-format, exactly as specified by the wave file.
+    drwav_uint8 subFormat[16];
+} drwav_fmt;
+
+typedef struct
+{
+    // A pointer to the function to call when more data is needed.
+    drwav_read_proc onRead;
+
+    // A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode.
+    drwav_write_proc onWrite;
+
+    // A pointer to the function to call when the wav file needs to be seeked.
+    drwav_seek_proc onSeek;
+
+    // The user data to pass to callbacks.
+    void* pUserData;
+
+
+    // Whether or not the WAV file is formatted as a standard RIFF file or W64.
+    drwav_container container;
+
+
+    // Structure containing format information exactly as specified by the wav file.
+    drwav_fmt fmt;
+
+    // The sample rate. Will be set to something like 44100.
+    drwav_uint32 sampleRate;
+
+    // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc.
+    drwav_uint16 channels;
+
+    // The bits per sample. Will be set to somthing like 16, 24, etc.
+    drwav_uint16 bitsPerSample;
+
+    // The number of bytes per sample.
+    drwav_uint16 bytesPerSample;
+
+    // Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE).
+    drwav_uint16 translatedFormatTag;
+
+    // The total number of samples making up the audio data. Use <totalSampleCount> * <bytesPerSample> to calculate
+    // the required size of a buffer to hold the entire audio data.
+    drwav_uint64 totalSampleCount;
+
+
+    // The size in bytes of the data chunk.
+    drwav_uint64 dataChunkDataSize;
+    
+    // The position in the stream of the first byte of the data chunk. This is used for seeking.
+    drwav_uint64 dataChunkDataPos;
+
+    // The number of bytes remaining in the data chunk.
+    drwav_uint64 bytesRemaining;
+
+
+    // A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory().
+    drwav__memory_stream memoryStream;
+    drwav__memory_stream_write memoryStreamWrite;
+
+    // Generic data for compressed formats. This data is shared across all block-compressed formats.
+    struct
+    {
+        drwav_uint64 iCurrentSample;    // The index of the next sample that will be read by drwav_read_*(). This is used with "totalSampleCount" to ensure we don't read excess samples at the end of the last block.
+    } compressed;
+    
+    // Microsoft ADPCM specific data.
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_uint16 predictor[2];
+        drwav_int32  delta[2];
+        drwav_int32  cachedSamples[4];  // Samples are stored in this cache during decoding.
+        drwav_uint32 cachedSampleCount;
+        drwav_int32  prevSamples[2][2]; // The previous 2 samples for each channel (2 channels at most).
+    } msadpcm;
+
+    // IMA ADPCM specific data.
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_int32  predictor[2];
+        drwav_int32  stepIndex[2];
+        drwav_int32  cachedSamples[16]; // Samples are stored in this cache during decoding.
+        drwav_uint32 cachedSampleCount;
+    } ima;
+} drwav;
+
+
+// Initializes a pre-allocated drwav object.
+//
+// onRead    [in]           The function to call when data needs to be read from the client.
+// onSeek    [in]           The function to call when the read position of the client data needs to move.
+// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+//
+// Returns true if successful; false otherwise.
+//
+// Close the loader with drwav_uninit().
+//
+// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+// to open the stream from a file or from a block of memory respectively.
+//
+// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
+// a drwav object on the heap and return a pointer to it.
+//
+// See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
+drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+
+// Initializes a pre-allocated drwav object for writing.
+//
+// onWrite   [in]           The function to call when data needs to be written.
+// onSeek    [in]           The function to call when the write position needs to move.
+// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+//
+// Returns true if successful; false otherwise.
+//
+// Close the writer with drwav_uninit().
+//
+// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+// to open the stream from a file or from a block of memory respectively.
+//
+// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
+// a drwav object on the heap and return a pointer to it.
+//
+// See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
+
+// Uninitializes the given drwav object.
+//
+// Use this only for objects initialized with drwav_init().
+void drwav_uninit(drwav* pWav);
+
+
+// Opens a wav file using the given callbacks.
+//
+// onRead    [in]           The function to call when data needs to be read from the client.
+// onSeek    [in]           The function to call when the read position of the client data needs to move.
+// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+//
+// Returns null on error.
+//
+// Close the loader with drwav_close().
+//
+// This is the lowest level function for opening a WAV file. You can also use drwav_open_file() and drwav_open_memory()
+// to open the stream from a file or from a block of memory respectively.
+//
+// This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
+// initializing it.
+//
+// See also: drwav_open_file(), drwav_open_memory(), drwav_close()
+drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+
+// Opens a wav file for writing using the given callbacks.
+//
+// onWrite   [in]           The function to call when data needs to be written.
+// onSeek    [in]           The function to call when the write position needs to move.
+// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+//
+// Returns null on error.
+//
+// Close the loader with drwav_close().
+//
+// This is the lowest level function for opening a WAV file. You can also use drwav_open_file_write() and drwav_open_memory_write()
+// to open the stream from a file or from a block of memory respectively.
+//
+// This is different from drwav_init_write() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
+// initializing it.
+//
+// See also: drwav_open_file_write(), drwav_open_memory_write(), drwav_close()
+drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
+
+// Uninitializes and deletes the the given drwav object.
+//
+// Use this only for objects created with drwav_open().
+void drwav_close(drwav* pWav);
+
+
+// Reads raw audio data.
+//
+// This is the lowest level function for reading audio data. It simply reads the given number of
+// bytes of the raw internal sample data.
+//
+// Consider using drwav_read_s16(), drwav_read_s32() or drwav_read_f32() for reading sample data in
+// a consistent format.
+//
+// Returns the number of bytes actually read.
+size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
+
+// Reads a chunk of audio data in the native internal format.
+//
+// This is typically the most efficient way to retrieve audio data, but it does not do any format
+// conversions which means you'll need to convert the data manually if required.
+//
+// If the return value is less than <samplesToRead> it means the end of the file has been reached or
+// you have requested more samples than can possibly fit in the output buffer.
+//
+// This function will only work when sample data is of a fixed size and uncompressed. If you are
+// using a compressed format consider using drwav_read_raw() or drwav_read_s16/s32/f32/etc().
+drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut);
+
+// Seeks to the given sample.
+//
+// Returns true if successful; false otherwise.
+drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample);
+
+
+// Writes raw audio data.
+//
+// Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
+
+// Writes audio data based on sample counts.
+//
+// Returns the number of samples written.
+drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData);
+
+
+
+//// Convertion Utilities ////
+#ifndef DR_WAV_NO_CONVERSION_API
+
+// Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
+//
+// Returns the number of samples actually read.
+//
+// If the return value is less than <samplesToRead> it means the end of the file has been reached.
+drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+
+// Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples.
+void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples.
+void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples.
+void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+// Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples.
+void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount);
+
+// Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples.
+void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
+
+// Low-level function for converting A-law samples to signed 16-bit PCM samples.
+void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting u-law samples to signed 16-bit PCM samples.
+void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+// Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
+//
+// Returns the number of samples actually read.
+//
+// If the return value is less than <samplesToRead> it means the end of the file has been reached.
+drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut);
+
+// Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples.
+void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples.
+void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples.
+void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples.
+void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+// Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples.
+void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount);
+
+// Low-level function for converting A-law samples to IEEE 32-bit floating point samples.
+void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting u-law samples to IEEE 32-bit floating point samples.
+void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+// Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
+//
+// Returns the number of samples actually read.
+//
+// If the return value is less than <samplesToRead> it means the end of the file has been reached.
+drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut);
+
+// Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples.
+void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples.
+void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+// Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples.
+void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples.
+void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount);
+
+// Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples.
+void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
+
+// Low-level function for converting A-law samples to signed 32-bit PCM samples.
+void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+// Low-level function for converting u-law samples to signed 32-bit PCM samples.
+void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+#endif  //DR_WAV_NO_CONVERSION_API
+
+
+//// High-Level Convenience Helpers ////
+
+#ifndef DR_WAV_NO_STDIO
+
+// Helper for initializing a wave file using stdio.
+//
+// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+// objects because the operating system may restrict the number of file handles an application can have open at
+// any given time.
+drwav_bool32 drwav_init_file(drwav* pWav, const char* filename);
+
+// Helper for initializing a wave file for writing using stdio.
+//
+// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+// objects because the operating system may restrict the number of file handles an application can have open at
+// any given time.
+drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat);
+
+// Helper for opening a wave file using stdio.
+//
+// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
+// objects because the operating system may restrict the number of file handles an application can have open at
+// any given time.
+drwav* drwav_open_file(const char* filename);
+
+// Helper for opening a wave file for writing using stdio.
+//
+// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
+// objects because the operating system may restrict the number of file handles an application can have open at
+// any given time.
+drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat);
+
+#endif  //DR_WAV_NO_STDIO
+
+// Helper for initializing a loader from a pre-allocated memory buffer.
+//
+// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+// the lifetime of the drwav object.
+//
+// The buffer should contain the contents of the entire wave file, not just the sample data.
+drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize);
+
+// Helper for initializing a writer which outputs data to a memory buffer.
+//
+// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+//
+// The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
+// considered valid until after drwav_uninit() has been called anyway.
+drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat);
+
+// Helper for opening a loader from a pre-allocated memory buffer.
+//
+// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+// the lifetime of the drwav object.
+//
+// The buffer should contain the contents of the entire wave file, not just the sample data.
+drwav* drwav_open_memory(const void* data, size_t dataSize);
+
+// Helper for opening a writer which outputs data to a memory buffer.
+//
+// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+//
+// The buffer will remain allocated even after drwav_close() is called. Indeed, the buffer should not be
+// considered valid until after drwav_close() has been called anyway.
+drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat);
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+// Opens and reads a wav file in a single operation.
+drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+#ifndef DR_WAV_NO_STDIO
+// Opens an decodes a wav file in a single operation.
+drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+#endif
+
+// Opens an decodes a wav file from a block of memory in a single operation.
+drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+#endif
+
+// Frees data that was allocated internally by dr_wav.
+void drwav_free(void* pDataReturnedByOpenAndRead);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // dr_wav_h
+
+
+/////////////////////////////////////////////////////
+//
+// IMPLEMENTATION
+//
+/////////////////////////////////////////////////////
+
+#ifdef DR_WAV_IMPLEMENTATION
+#include <stdlib.h>
+#include <string.h> // For memcpy(), memset()
+#include <limits.h> // For INT_MAX
+
+#ifndef DR_WAV_NO_STDIO
+#include <stdio.h>
+#endif
+
+// Standard library stuff.
+#ifndef DRWAV_ASSERT
+#include <assert.h>
+#define DRWAV_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRWAV_MALLOC
+#define DRWAV_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRWAV_REALLOC
+#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRWAV_FREE
+#define DRWAV_FREE(p)                      free((p))
+#endif
+#ifndef DRWAV_COPY_MEMORY
+#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRWAV_ZERO_MEMORY
+#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+
+#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
+#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
+#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
+#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
+#define drwav_clamp(x, lo, hi)             (drwav_max((lo), drwav_min((hi), (x))))
+
+#define drwav_assert                       DRWAV_ASSERT
+#define drwav_copy_memory                  DRWAV_COPY_MEMORY
+#define drwav_zero_memory                  DRWAV_ZERO_MEMORY
+
+
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  // 64 for AVX-512 in the future.
+
+#ifdef _MSC_VER
+#define DRWAV_INLINE __forceinline
+#else
+#ifdef __GNUC__
+#define DRWAV_INLINE inline __attribute__((always_inline))
+#else
+#define DRWAV_INLINE inline
+#endif
+#endif
+
+// I couldn't figure out where SIZE_MAX was defined for VC6. If anybody knows, let me know.
+#if defined(_MSC_VER) && _MSC_VER <= 1200
+    #if defined(_WIN64)
+        #define SIZE_MAX    ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define SIZE_MAX    0xFFFFFFFF
+    #endif
+#endif
+
+static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    // 66666972-912E-11CF-A5D6-28DB04C10000
+static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 65766177-ACF3-11D3-8CD1-00C04F8EDB8A
+static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A
+static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A
+static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 74636166-ACF3-11D3-8CD1-00C04F8EDB8A
+static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 61746164-ACF3-11D3-8CD1-00C04F8EDB8A
+
+static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    const drwav_uint32* a32 = (const drwav_uint32*)a;
+    const drwav_uint32* b32 = (const drwav_uint32*)b;
+
+    return
+        a32[0] == b32[0] &&
+        a32[1] == b32[1] &&
+        a32[2] == b32[2] &&
+        a32[3] == b32[3];
+}
+
+static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const unsigned char* a, const char* b)
+{
+    return
+        a[0] == b[0] &&
+        a[1] == b[1] &&
+        a[2] == b[2] &&
+        a[3] == b[3];
+}
+
+
+
+static DRWAV_INLINE int drwav__is_little_endian()
+{
+    int n = 1;
+    return (*(char*)&n) == 1;
+}
+
+static DRWAV_INLINE unsigned short drwav__bytes_to_u16(const unsigned char* data)
+{
+    if (drwav__is_little_endian()) {
+        return (data[0] << 0) | (data[1] << 8);
+    } else {
+        return (data[1] << 0) | (data[0] << 8);
+    }
+}
+
+static DRWAV_INLINE short drwav__bytes_to_s16(const unsigned char* data)
+{
+    return (short)drwav__bytes_to_u16(data);
+}
+
+static DRWAV_INLINE unsigned int drwav__bytes_to_u32(const unsigned char* data)
+{
+    if (drwav__is_little_endian()) {
+        return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+    } else {
+        return (data[3] << 0) | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
+    }
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const unsigned char* data)
+{
+    if (drwav__is_little_endian()) {
+        return
+            ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+            ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
+    } else {
+        return
+            ((drwav_uint64)data[7] <<  0) | ((drwav_uint64)data[6] <<  8) | ((drwav_uint64)data[5] << 16) | ((drwav_uint64)data[4] << 24) |
+            ((drwav_uint64)data[3] << 32) | ((drwav_uint64)data[2] << 40) | ((drwav_uint64)data[1] << 48) | ((drwav_uint64)data[0] << 56);
+    }
+}
+
+static DRWAV_INLINE void drwav__bytes_to_guid(const unsigned char* data, drwav_uint8* guid)
+{
+    for (int i = 0; i < 16; ++i) {
+        guid[i] = data[i];
+    }
+}
+
+
+static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
+{
+    return
+        formatTag == DR_WAVE_FORMAT_ADPCM ||
+        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
+}
+
+
+typedef struct
+{
+    union
+    {
+        drwav_uint8 fourcc[4];
+        drwav_uint8 guid[16];
+    } id;
+
+    // The size in bytes of the chunk.
+    drwav_uint64 sizeInBytes;
+
+    // RIFF = 2 byte alignment.
+    // W64  = 8 byte alignment.
+    unsigned int paddingSize;
+
+} drwav__chunk_header;
+
+static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav__chunk_header* pHeaderOut)
+{
+    if (container == drwav_container_riff) {
+        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
+            return DRWAV_FALSE;
+        }
+
+        unsigned char sizeInBytes[4];
+        if (onRead(pUserData, sizeInBytes, 4) != 4) {
+            return DRWAV_FALSE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
+        pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 2);
+        *pRunningBytesReadOut += 8;
+    } else {
+        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
+            return DRWAV_FALSE;
+        }
+
+        unsigned char sizeInBytes[8];
+        if (onRead(pUserData, sizeInBytes, 8) != 8) {
+            return DRWAV_FALSE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    // <-- Subtract 24 because w64 includes the size of the header.
+        pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 8);
+        pRunningBytesReadOut += 24;
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    drwav_uint64 bytesRemainingToSeek = offset;
+    while (bytesRemainingToSeek > 0) {
+        if (bytesRemainingToSeek > 0x7FFFFFFF) {
+            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek -= 0x7FFFFFFF;
+        } else {
+            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek = 0;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
+{
+    drwav__chunk_header header;
+    if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) {
+        return DRWAV_FALSE;
+    }
+
+
+    // Skip junk chunks.
+    if ((container == drwav_container_riff && drwav__fourcc_equal(header.id.fourcc, "JUNK")) || (container == drwav_container_w64 && drwav__guid_equal(header.id.guid, drwavGUID_W64_JUNK))) {
+        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
+
+        return drwav__read_fmt(onRead, onSeek, pUserData, container, pRunningBytesReadOut, fmtOut);
+    }
+
+
+    // Validation.
+    if (container == drwav_container_riff) {
+        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    unsigned char fmt[16];
+    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
+        return DRWAV_FALSE;
+    }
+    *pRunningBytesReadOut += sizeof(fmt);
+
+    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
+    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
+    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
+    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
+    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
+    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
+
+    fmtOut->extendedSize       = 0;
+    fmtOut->validBitsPerSample = 0;
+    fmtOut->channelMask        = 0;
+    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
+
+    if (header.sizeInBytes > 16) {
+        unsigned char fmt_cbSize[2];
+        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
+            return DRWAV_FALSE;    // Expecting more data.
+        }
+        *pRunningBytesReadOut += sizeof(fmt_cbSize);
+
+        int bytesReadSoFar = 18;
+
+        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
+        if (fmtOut->extendedSize > 0) {
+            // Simple validation.
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                if (fmtOut->extendedSize != 22) {
+                    return DRWAV_FALSE;
+                }
+            }
+
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                unsigned char fmtext[22];
+                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
+                    return DRWAV_FALSE;    // Expecting more data.
+                }
+
+                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
+                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
+                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
+            } else {
+                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
+                    return DRWAV_FALSE;
+                }
+            }
+            *pRunningBytesReadOut += fmtOut->extendedSize;
+
+            bytesReadSoFar += fmtOut->extendedSize;
+        }
+
+        // Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size.
+        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
+    }
+
+    if (header.paddingSize > 0) {
+        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.paddingSize;
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+#ifndef DR_WAV_NO_STDIO
+static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
+{
+    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
+}
+
+static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+drwav_bool32 drwav_init_file(drwav* pWav, const char* filename)
+{
+    FILE* pFile;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    if (fopen_s(&pFile, filename, "rb") != 0) {
+        return DRWAV_FALSE;
+    }
+#else
+    pFile = fopen(filename, "rb");
+    if (pFile == NULL) {
+        return DRWAV_FALSE;
+    }
+#endif
+
+    return drwav_init(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile);
+}
+
+drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat)
+{
+    FILE* pFile;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    if (fopen_s(&pFile, filename, "wb") != 0) {
+        return DRWAV_FALSE;
+    }
+#else
+    pFile = fopen(filename, "wb");
+    if (pFile == NULL) {
+        return DRWAV_FALSE;
+    }
+#endif
+
+    return drwav_init_write(pWav, pFormat, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile);
+}
+
+drwav* drwav_open_file(const char* filename)
+{
+    FILE* pFile;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    if (fopen_s(&pFile, filename, "rb") != 0) {
+        return NULL;
+    }
+#else
+    pFile = fopen(filename, "rb");
+    if (pFile == NULL) {
+        return NULL;
+    }
+#endif
+
+    drwav* pWav = drwav_open(drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile);
+    if (pWav == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pWav;
+}
+
+drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat)
+{
+    FILE* pFile;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    if (fopen_s(&pFile, filename, "wb") != 0) {
+        return NULL;
+    }
+#else
+    pFile = fopen(filename, "wb");
+    if (pFile == NULL) {
+        return NULL;
+    }
+#endif
+
+    drwav* pWav = drwav_open_write(pFormat, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile);
+    if (pWav == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pWav;
+}
+#endif  //DR_WAV_NO_STDIO
+
+
+static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    drwav__memory_stream* memory = (drwav__memory_stream*)pUserData;
+    drwav_assert(memory != NULL);
+    drwav_assert(memory->dataSize >= memory->currentReadPos);
+
+    size_t bytesRemaining = memory->dataSize - memory->currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRWAV_COPY_MEMORY(pBufferOut, memory->data + memory->currentReadPos, bytesToRead);
+        memory->currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav__memory_stream* memory = (drwav__memory_stream*)pUserData;
+    drwav_assert(memory != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (memory->currentReadPos + offset > memory->dataSize) {
+                offset = (int)(memory->dataSize - memory->currentReadPos);  // Trying to seek too far forward.
+            }
+        } else {
+            if (memory->currentReadPos < (size_t)-offset) {
+                offset = -(int)memory->currentReadPos;  // Trying to seek too far backwards.
+            }
+        }
+
+        // This will never underflow thanks to the clamps above.
+        memory->currentReadPos += offset;
+    } else {
+        if ((drwav_uint32)offset <= memory->dataSize) {
+            memory->currentReadPos = offset;
+        } else {
+            memory->currentReadPos = memory->dataSize;  // Trying to seek too far forward.
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
+{
+    drwav__memory_stream_write* memory = (drwav__memory_stream_write*)pUserData;
+    drwav_assert(memory != NULL);
+    drwav_assert(memory->dataCapacity >= memory->currentWritePos);
+
+    size_t bytesRemaining = memory->dataCapacity - memory->currentWritePos;
+    if (bytesRemaining < bytesToWrite) {
+        // Need to reallocate.
+        size_t newDataCapacity = (memory->dataCapacity == 0) ? 256 : memory->dataCapacity * 2;
+
+        // If doubling wasn't enough, just make it the minimum required size to write the data.
+        if ((newDataCapacity - memory->currentWritePos) < bytesToWrite) {
+            newDataCapacity = memory->currentWritePos + bytesToWrite;
+        }
+
+        void* pNewData = DRWAV_REALLOC(*memory->ppData, newDataCapacity);
+        if (pNewData == NULL) {
+            return 0;
+        }
+
+        *memory->ppData = pNewData;
+        memory->dataCapacity = newDataCapacity;
+    }
+
+    drwav_uint8* pDataOut = (drwav_uint8*)(*memory->ppData);
+    DRWAV_COPY_MEMORY(pDataOut + memory->currentWritePos, pDataIn, bytesToWrite);
+
+    memory->currentWritePos += bytesToWrite;
+    if (memory->dataSize < memory->currentWritePos) {
+        memory->dataSize = memory->currentWritePos;
+    }
+
+    *memory->pDataSize = memory->dataSize;
+
+    return bytesToWrite;
+}
+
+static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav__memory_stream_write* memory = (drwav__memory_stream_write*)pUserData;
+    drwav_assert(memory != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (memory->currentWritePos + offset > memory->dataSize) {
+                offset = (int)(memory->dataSize - memory->currentWritePos);  // Trying to seek too far forward.
+            }
+        } else {
+            if (memory->currentWritePos < (size_t)-offset) {
+                offset = -(int)memory->currentWritePos;  // Trying to seek too far backwards.
+            }
+        }
+
+        // This will never underflow thanks to the clamps above.
+        memory->currentWritePos += offset;
+    } else {
+        if ((drwav_uint32)offset <= memory->dataSize) {
+            memory->currentWritePos = offset;
+        } else {
+            memory->currentWritePos = memory->dataSize;  // Trying to seek too far forward.
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    drwav__memory_stream memoryStream;
+    drwav_zero_memory(&memoryStream, sizeof(memoryStream));
+    memoryStream.data = (const unsigned char*)data;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+
+    if (!drwav_init(pWav, drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream = memoryStream;
+    pWav->pUserData = &pWav->memoryStream;
+    return DRWAV_TRUE;
+}
+
+drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat)
+{
+    if (ppData == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    *ppData = NULL; // Important because we're using realloc()!
+    *pDataSize = 0;
+
+    drwav__memory_stream_write memoryStreamWrite;
+    drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite));
+    memoryStreamWrite.ppData = ppData;
+    memoryStreamWrite.pDataSize = pDataSize;
+    memoryStreamWrite.dataSize = 0;
+    memoryStreamWrite.dataCapacity = 0;
+    memoryStreamWrite.currentWritePos = 0;
+
+    if (!drwav_init_write(pWav, pFormat, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStreamWrite = memoryStreamWrite;
+    pWav->pUserData = &pWav->memoryStreamWrite;
+    return DRWAV_TRUE;
+}
+
+drwav* drwav_open_memory(const void* data, size_t dataSize)
+{
+    if (data == NULL || dataSize == 0) {
+        return NULL;
+    }
+
+    drwav__memory_stream memoryStream;
+    drwav_zero_memory(&memoryStream, sizeof(memoryStream));
+    memoryStream.data = (const unsigned char*)data;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+
+    drwav* pWav = drwav_open(drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream);
+    if (pWav == NULL) {
+        return NULL;
+    }
+
+    pWav->memoryStream = memoryStream;
+    pWav->pUserData = &pWav->memoryStream;
+    return pWav;
+}
+
+drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat)
+{
+    if (ppData == NULL) {
+        return NULL;
+    }
+
+    *ppData = NULL; // Important because we're using realloc()!
+    *pDataSize = 0;
+
+    drwav__memory_stream_write memoryStreamWrite;
+    drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite));
+    memoryStreamWrite.ppData = ppData;
+    memoryStreamWrite.pDataSize = pDataSize;
+    memoryStreamWrite.dataSize = 0;
+    memoryStreamWrite.dataCapacity = 0;
+    memoryStreamWrite.currentWritePos = 0;
+
+    drwav* pWav = drwav_open_write(pFormat, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite);
+    if (pWav == NULL) {
+        return NULL;
+    }
+
+    pWav->memoryStreamWrite = memoryStreamWrite;
+    pWav->pUserData = &pWav->memoryStreamWrite;
+    return pWav;
+}
+
+
+drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData)
+{
+    if (onRead == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    drwav_zero_memory(pWav, sizeof(*pWav));
+
+
+    // The first 4 bytes should be the RIFF identifier.
+    unsigned char riff[4];
+    if (onRead(pUserData, riff, sizeof(riff)) != sizeof(riff)) {
+        return DRWAV_FALSE;    // Failed to read data.
+    }
+
+    // The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    // w64 it will start with "riff".
+    if (drwav__fourcc_equal(riff, "RIFF")) {
+        pWav->container = drwav_container_riff;
+    } else if (drwav__fourcc_equal(riff, "riff")) {
+        pWav->container = drwav_container_w64;
+
+        // Check the rest of the GUID for validity.
+        drwav_uint8 riff2[12];
+        if (onRead(pUserData, riff2, sizeof(riff2)) != sizeof(riff2)) {
+            return DRWAV_FALSE;
+        }
+
+        for (int i = 0; i < 12; ++i) {
+            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
+                return DRWAV_FALSE;
+            }
+        }
+    } else {
+        return DRWAV_FALSE;   // Unknown or unsupported container.
+    }
+
+
+    if (pWav->container == drwav_container_riff) {
+        // RIFF/WAVE
+        unsigned char chunkSizeBytes[4];
+        if (onRead(pUserData, chunkSizeBytes, sizeof(chunkSizeBytes)) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        unsigned int chunkSize = drwav__bytes_to_u32(chunkSizeBytes);
+        if (chunkSize < 36) {
+            return DRWAV_FALSE;    // Chunk size should always be at least 36 bytes.
+        }
+
+        unsigned char wave[4];
+        if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__fourcc_equal(wave, "WAVE")) {
+            return DRWAV_FALSE;    // Expecting "WAVE".
+        }
+
+        pWav->dataChunkDataPos = 4 + sizeof(chunkSizeBytes) + sizeof(wave);
+    } else {
+        // W64
+        unsigned char chunkSize[8];
+        if (onRead(pUserData, chunkSize, sizeof(chunkSize)) != sizeof(chunkSize)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u64(chunkSize) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        drwav_uint8 wave[16];
+        if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
+            return DRWAV_FALSE;
+        }
+
+        pWav->dataChunkDataPos = 16 + sizeof(chunkSize) + sizeof(wave);
+    }
+
+
+    // The next 24 bytes should be the "fmt " chunk.
+    drwav_fmt fmt;
+    if (!drwav__read_fmt(onRead, onSeek, pUserData, pWav->container, &pWav->dataChunkDataPos, &fmt)) {
+        return DRWAV_FALSE;    // Failed to read the "fmt " chunk.
+    }
+
+
+    // Translate the internal format.
+    unsigned short translatedFormatTag = fmt.formatTag;
+    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
+    }
+
+
+    drwav_uint64 sampleCountFromFactChunk = 0;
+
+    // The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop.
+    drwav_uint64 dataSize;
+    for (;;)
+    {
+        drwav__chunk_header header;
+        if (!drwav__read_chunk_header(onRead, pUserData, pWav->container, &pWav->dataChunkDataPos, &header)) {
+            return DRWAV_FALSE;
+        }
+
+        dataSize = header.sizeInBytes;
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
+                break;
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
+                break;
+            }
+        }
+
+        // Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats.
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
+                drwav_uint32 sampleCount;
+                if (onRead(pUserData, &sampleCount, 4) != 4) {
+                    return DRWAV_FALSE;
+                }
+                pWav->dataChunkDataPos += 4;
+                dataSize -= 4;
+
+                // The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
+                // for Microsoft ADPCM formats.
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    sampleCountFromFactChunk = sampleCount;
+                } else {
+                    sampleCountFromFactChunk = 0;
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
+                if (onRead(pUserData, &sampleCountFromFactChunk, 8) != 8) {
+                    return DRWAV_FALSE;
+                }
+                pWav->dataChunkDataPos += 4;
+                dataSize -= 8;
+            }
+        }
+
+        // If we get here it means we didn't find the "data" chunk. Seek past it.
+
+        // Make sure we seek past the padding.
+        dataSize += header.paddingSize;
+        drwav__seek_forward(onSeek, dataSize, pUserData);
+        pWav->dataChunkDataPos += dataSize;
+    }
+
+    // At this point we should be sitting on the first byte of the raw audio data.
+
+    pWav->onRead              = onRead;
+    pWav->onSeek              = onSeek;
+    pWav->pUserData           = pUserData;
+    pWav->fmt                 = fmt;
+    pWav->sampleRate          = fmt.sampleRate;
+    pWav->channels            = fmt.channels;
+    pWav->bitsPerSample       = fmt.bitsPerSample;
+    pWav->bytesPerSample      = (unsigned int)(fmt.blockAlign / fmt.channels);
+    pWav->bytesRemaining      = dataSize;
+    pWav->translatedFormatTag = translatedFormatTag;
+    pWav->dataChunkDataSize   = dataSize;
+
+    if (sampleCountFromFactChunk != 0) {
+        pWav->totalSampleCount = sampleCountFromFactChunk * fmt.channels;
+    } else {
+        pWav->totalSampleCount = dataSize / pWav->bytesPerSample;
+
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            drwav_uint64 blockCount = dataSize / fmt.blockAlign;
+            pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2;  // x2 because two samples per byte.
+        }
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            drwav_uint64 blockCount = dataSize / fmt.blockAlign;
+            pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels);
+        }
+    }
+    
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        pWav->bytesPerSample = 0;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    // I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
+    // it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
+    // from the number of blocks, however this results in the inclusion of the extra silent samples at the end of the last block. The correct
+    // way to know the total sample count is to inspect the "fact" chunk which should always be present for compressed formats, and should
+    // always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
+    // correctness tests against libsndfile and is disabled by default.
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        drwav_uint64 blockCount = dataSize / fmt.blockAlign;
+        pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2;  // x2 because two samples per byte.
+    }
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        drwav_uint64 blockCount = dataSize / fmt.blockAlign;
+        pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels);
+    }
+#endif
+
+    return DRWAV_TRUE;
+}
+
+drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData)
+{
+    if (onWrite == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    // Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this.
+    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
+        return DRWAV_FALSE;
+    }
+    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return DRWAV_FALSE;
+    }
+
+
+    drwav_zero_memory(pWav, sizeof(*pWav));
+    pWav->onWrite = onWrite;
+    pWav->onSeek = onSeek;
+    pWav->pUserData = pUserData;
+    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
+    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
+    pWav->fmt.sampleRate = pFormat->sampleRate;
+    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) >> 3);
+    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) >> 3);
+    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->fmt.extendedSize = 0;
+
+    size_t runningPos = 0;
+
+    // "RIFF" chunk.
+    drwav_uint64 chunkSizeRIFF = 0;
+    if (pFormat->container == drwav_container_riff) {
+        runningPos += pWav->onWrite(pUserData, "RIFF", 4);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 4);
+        runningPos += pWav->onWrite(pUserData, "WAVE", 4);
+    } else {
+        runningPos += pWav->onWrite(pUserData, drwavGUID_W64_RIFF, 16);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 8);
+        runningPos += pWav->onWrite(pUserData, drwavGUID_W64_WAVE, 16);
+    }
+
+    // "fmt " chunk.
+    drwav_uint64 chunkSizeFMT;
+    if (pFormat->container == drwav_container_riff) {
+        chunkSizeFMT = 16;
+        runningPos += pWav->onWrite(pUserData, "fmt ", 4);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeFMT, 4);
+    } else {
+        chunkSizeFMT = 40;
+        runningPos += pWav->onWrite(pUserData, drwavGUID_W64_FMT, 16);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeFMT, 8);
+    }
+
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.formatTag,      2);
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.channels,       2);
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.sampleRate,     4);
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.avgBytesPerSec, 4);
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.blockAlign,     2);
+    runningPos += pWav->onWrite(pUserData, &pWav->fmt.bitsPerSample,  2);
+
+    pWav->dataChunkDataPos = runningPos;
+    pWav->dataChunkDataSize = 0;
+
+    // "data" chunk.
+    drwav_uint64 chunkSizeDATA = 0;
+    if (pFormat->container == drwav_container_riff) {
+        runningPos += pWav->onWrite(pUserData, "data", 4);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 4);
+    } else {
+        runningPos += pWav->onWrite(pUserData, drwavGUID_W64_DATA, 16);
+        runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 8);
+    }
+
+
+    // Simple validation.
+    if (pFormat->container == drwav_container_riff) {
+        if (runningPos != 20 + chunkSizeFMT + 8) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (runningPos != 40 + chunkSizeFMT + 24) {
+            return DRWAV_FALSE;
+        }
+    }
+    
+
+
+    // Set some properties for the client's convenience.
+    pWav->container = pFormat->container;
+    pWav->channels = (drwav_uint16)pFormat->channels;
+    pWav->sampleRate = pFormat->sampleRate;
+    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->bytesPerSample = (drwav_uint16)(pFormat->bitsPerSample >> 3);
+    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
+
+    return DRWAV_TRUE;
+}
+
+void drwav_uninit(drwav* pWav)
+{
+    if (pWav == NULL) {
+        return;
+    }
+
+    // If the drwav object was opened in write mode we'll need to finialize a few things:
+    //   - Make sure the "data" chunk is aligned to 16-bits
+    //   - Set the size of the "data" chunk.
+    if (pWav->onWrite != NULL) {
+        // Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding.
+        drwav_uint32 paddingSize = 0;
+        if (pWav->container == drwav_container_riff) {
+            paddingSize = (drwav_uint32)(pWav->dataChunkDataSize % 2);
+        } else {
+            paddingSize = (drwav_uint32)(pWav->dataChunkDataSize % 8);
+        }
+        
+        if (paddingSize > 0) {
+            drwav_uint64 paddingData = 0;
+            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
+        }
+
+
+        // Chunk sizes.
+        if (pWav->onSeek) {
+            if (pWav->container == drwav_container_riff) {
+                // The "RIFF" chunk size.
+                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
+                    drwav_uint32 riffChunkSize = 36;
+                    if (pWav->dataChunkDataSize <= (0xFFFFFFFF - 36)) {
+                        riffChunkSize = 36 + (drwav_uint32)pWav->dataChunkDataSize;
+                    } else {
+                        riffChunkSize = 0xFFFFFFFF;
+                    }
+
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
+                }
+
+                // the "data" chunk size.
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
+                    drwav_uint32 dataChunkSize = 0;
+                    if (pWav->dataChunkDataSize <= 0xFFFFFFFF) {
+                        dataChunkSize = (drwav_uint32)pWav->dataChunkDataSize;
+                    } else {
+                        dataChunkSize = 0xFFFFFFFF;
+                    }
+                    
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
+                }
+            } else {
+                // The "RIFF" chunk size.
+                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = 80 + 24 + pWav->dataChunkDataSize;
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
+                }
+
+                // The "data" chunk size.
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = 24 + pWav->dataChunkDataSize;  // +24 because W64 includes the size of the GUID and size fields.
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
+                }
+            }
+        }
+    }
+
+#ifndef DR_WAV_NO_STDIO
+    // If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
+    // was used by looking at the onRead and onSeek callbacks.
+    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
+        fclose((FILE*)pWav->pUserData);
+    }
+#endif
+}
+
+
+drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData)
+{
+    drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav));
+    if (pWav == NULL) {
+        return NULL;
+    }
+
+    if (!drwav_init(pWav, onRead, onSeek, pUserData)) {
+        DRWAV_FREE(pWav);
+        return NULL;
+    }
+
+    return pWav;
+}
+
+drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData)
+{
+    drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav));
+    if (pWav == NULL) {
+        return NULL;
+    }
+
+    if (!drwav_init_write(pWav, pFormat, onWrite, onSeek, pUserData)) {
+        DRWAV_FREE(pWav);
+        return NULL;
+    }
+
+    return pWav;
+}
+
+void drwav_close(drwav* pWav)
+{
+    drwav_uninit(pWav);
+    DRWAV_FREE(pWav);
+}
+
+
+size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
+{
+    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    if (bytesToRead > pWav->bytesRemaining) {
+        bytesToRead = (size_t)pWav->bytesRemaining;
+    }
+
+    size_t bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+
+    pWav->bytesRemaining -= bytesRead;
+    return bytesRead;
+}
+
+drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut)
+{
+    if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    // Cannot use this function for compressed formats.
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        return 0;
+    }
+
+    // Don't try to read more samples than can potentially fit in the output buffer.
+    if (samplesToRead * pWav->bytesPerSample > SIZE_MAX) {
+        samplesToRead = SIZE_MAX / pWav->bytesPerSample;
+    }
+
+    size_t bytesRead = drwav_read_raw(pWav, (size_t)(samplesToRead * pWav->bytesPerSample), pBufferOut);
+    return bytesRead / pWav->bytesPerSample;
+}
+
+drwav_bool32 drwav_seek_to_first_sample(drwav* pWav)
+{
+    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        pWav->compressed.iCurrentSample = 0;
+    }
+    
+    pWav->bytesRemaining = pWav->dataChunkDataSize;
+    return DRWAV_TRUE;
+}
+
+drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample)
+{
+    // Seeking should be compatible with wave files > 2GB.
+
+    if (pWav == NULL || pWav->onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    // If there are no samples, just return DRWAV_TRUE without doing anything.
+    if (pWav->totalSampleCount == 0) {
+        return DRWAV_TRUE;
+    }
+
+    // Make sure the sample is clamped.
+    if (sample >= pWav->totalSampleCount) {
+        sample  = pWav->totalSampleCount - 1;
+    }
+
+
+    // For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
+    // to seek back to the start.
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        // TODO: This can be optimized.
+        if (sample > pWav->compressed.iCurrentSample) {
+            // Seeking forward - just move from the current position.
+            drwav_uint64 offset = sample - pWav->compressed.iCurrentSample;
+
+            drwav_int16 devnull[2048];
+            while (offset > 0) {
+                drwav_uint64 samplesToRead = sample;
+                if (samplesToRead > 2048) {
+                    samplesToRead = 2048;
+                }
+
+                drwav_uint64 samplesRead = drwav_read_s16(pWav, samplesToRead, devnull);
+                if (samplesRead != samplesToRead) {
+                    return DRWAV_FALSE;
+                }
+
+                offset -= samplesRead;
+            }
+        } else {
+            // Seeking backwards. Just use the fallback.
+            goto fallback;
+        }
+    } else {
+        drwav_uint64 totalSizeInBytes = pWav->totalSampleCount * pWav->bytesPerSample;
+        drwav_assert(totalSizeInBytes >= pWav->bytesRemaining);
+
+        drwav_uint64 currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
+        drwav_uint64 targetBytePos  = sample * pWav->bytesPerSample;
+
+        drwav_uint64 offset;
+        if (currentBytePos < targetBytePos) {
+            // Offset forwards.
+            offset = (targetBytePos - currentBytePos);
+        } else {
+            // Offset backwards.
+            if (!drwav_seek_to_first_sample(pWav)) {
+                return DRWAV_FALSE;
+            }
+            offset = targetBytePos;
+        }
+
+        while (offset > 0) {
+            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
+            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+
+            pWav->bytesRemaining -= offset32;
+            offset -= offset32;
+        }
+    }
+
+    return DRWAV_TRUE;
+
+fallback:
+    // This is a generic seek implementation that just continuously reads samples into a temporary buffer. This should work for all supported
+    // formats, but it is not efficient. This should be used as a fall back.
+    if (!drwav_seek_to_first_sample(pWav)) {
+        return DRWAV_FALSE;
+    }
+
+    drwav_int16 devnull[2048];
+    while (sample > 0) {
+        drwav_uint64 samplesToRead = sample;
+        if (samplesToRead > 2048) {
+            samplesToRead = 2048;
+        }
+
+        drwav_uint64 samplesRead = drwav_read_s16(pWav, samplesToRead, devnull);
+        if (samplesRead != samplesToRead) {
+            return DRWAV_FALSE;
+        }
+
+        sample -= samplesRead;
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
+{
+    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    size_t bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
+    pWav->dataChunkDataSize += bytesWritten;
+
+    return bytesWritten;
+}
+
+drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData)
+{
+    if (pWav == NULL || samplesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    drwav_uint64 bytesToWrite = ((samplesToWrite * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > SIZE_MAX) {
+        return 0;
+    }
+
+    size_t bytesWritten = drwav_write_raw(pWav, (size_t)bytesToWrite, pData);
+    return ((drwav_uint64)bytesWritten * 8) / pWav->bitsPerSample;
+}
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+static unsigned short g_drwavAlawTable[256] = {
+    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
+    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
+    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
+    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
+    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
+    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
+    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
+    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
+    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
+    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
+    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
+    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
+    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
+    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
+    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
+    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
+};
+
+static unsigned short g_drwavMulawTable[256] = {
+    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
+    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
+    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
+    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
+    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
+    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
+    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
+    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
+    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
+    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
+    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
+    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
+    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
+    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
+    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
+    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
+};
+
+static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavAlawTable[sampleIn];
+}
+
+static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavMulawTable[sampleIn];
+}
+
+
+
+static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+{
+    // Special case for 8-bit sample data because it's treated as unsigned.
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+
+    // Slightly more optimal implementation for common formats.
+    if (bytesPerSample == 2) {
+        for (unsigned int i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((drwav_int16*)pIn)[i];
+        }
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
+        return;
+    }
+
+
+    // Generic, slow converter.
+    for (unsigned int i = 0; i < totalSampleCount; ++i) {
+        unsigned short sample = 0;
+        unsigned short shift  = (8 - bytesPerSample) * 8;
+        for (unsigned short j = 0; j < bytesPerSample && j < 2; ++j) {
+            sample |= (unsigned short)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += bytesPerSample;
+        *pOut++ = sample;
+    }
+}
+
+static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s16(pOut, (float*)pIn, totalSampleCount);
+        return;
+    } else {
+        drwav_f64_to_s16(pOut, (double*)pIn, totalSampleCount);
+        return;
+    }
+}
+
+drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    // Fast path.
+    if (pWav->bytesPerSample == 2) {
+        return drwav_read(pWav, samplesToRead, pBufferOut);
+    }
+
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    drwav_assert(pWav != NULL);
+    drwav_assert(samplesToRead > 0);
+    drwav_assert(pBufferOut != NULL);
+
+    // TODO: Lots of room for optimization here.
+
+    drwav_uint64 totalSamplesRead = 0;
+
+    while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
+        // If there are no cached samples we need to load a new block.
+        if (pWav->msadpcm.cachedSampleCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                // Mono.
+                drwav_uint8 header[7];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalSamplesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0] = header[0];
+                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 1);
+                pWav->msadpcm.prevSamples[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
+                pWav->msadpcm.prevSamples[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
+                pWav->msadpcm.cachedSamples[2] = pWav->msadpcm.prevSamples[0][0];
+                pWav->msadpcm.cachedSamples[3] = pWav->msadpcm.prevSamples[0][1];
+                pWav->msadpcm.cachedSampleCount = 2;
+            } else {
+                // Stereo.
+                drwav_uint8 header[14];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalSamplesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0] = header[0];
+                pWav->msadpcm.predictor[1] = header[1];
+                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
+                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
+                pWav->msadpcm.prevSamples[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
+                pWav->msadpcm.prevSamples[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
+                pWav->msadpcm.prevSamples[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
+                pWav->msadpcm.prevSamples[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
+
+                pWav->msadpcm.cachedSamples[0] = pWav->msadpcm.prevSamples[0][0];
+                pWav->msadpcm.cachedSamples[1] = pWav->msadpcm.prevSamples[1][0];
+                pWav->msadpcm.cachedSamples[2] = pWav->msadpcm.prevSamples[0][1];
+                pWav->msadpcm.cachedSamples[3] = pWav->msadpcm.prevSamples[1][1];
+                pWav->msadpcm.cachedSampleCount = 4;
+            }
+        }
+
+        // Output anything that's cached.
+        while (samplesToRead > 0 && pWav->msadpcm.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
+            pBufferOut[0] = (drwav_int16)pWav->msadpcm.cachedSamples[drwav_countof(pWav->msadpcm.cachedSamples) - pWav->msadpcm.cachedSampleCount];
+            pWav->msadpcm.cachedSampleCount -= 1;
+
+            pBufferOut += 1;
+            samplesToRead -= 1;
+            totalSamplesRead += 1;
+            pWav->compressed.iCurrentSample += 1;
+        }
+
+        if (samplesToRead == 0) {
+            return totalSamplesRead;
+        }
+
+
+        // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        // loop iteration which will trigger the loading of a new block.
+        if (pWav->msadpcm.cachedSampleCount == 0) {
+            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                drwav_uint8 nibbles;
+                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
+                    return totalSamplesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock -= 1;
+
+                // TODO: Optimize away these if statements.
+                drwav_int32 nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
+                drwav_int32 nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
+
+                static drwav_int32 adaptationTable[] = { 
+                    230, 230, 230, 230, 307, 409, 512, 614, 
+                    768, 614, 512, 409, 307, 230, 230, 230 
+                };
+                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+
+                if (pWav->channels == 1) {
+                    // Mono.
+                    drwav_int32 newSample0;
+                    newSample0  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1];
+                    pWav->msadpcm.prevSamples[0][1] = newSample0;
+
+
+                    drwav_int32 newSample1;
+                    newSample1  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1];
+                    pWav->msadpcm.prevSamples[0][1] = newSample1;
+
+
+                    pWav->msadpcm.cachedSamples[2] = newSample0;
+                    pWav->msadpcm.cachedSamples[3] = newSample1;
+                    pWav->msadpcm.cachedSampleCount = 2;
+                } else {
+                    // Stereo.
+
+                    // Left.
+                    drwav_int32 newSample0;
+                    newSample0  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevSamples[0][0] = pWav->msadpcm.prevSamples[0][1];
+                    pWav->msadpcm.prevSamples[0][1] = newSample0;
+
+
+                    // Right.
+                    drwav_int32 newSample1;
+                    newSample1  = ((pWav->msadpcm.prevSamples[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevSamples[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
+                    if (pWav->msadpcm.delta[1] < 16) {
+                        pWav->msadpcm.delta[1] = 16;
+                    }
+
+                    pWav->msadpcm.prevSamples[1][0] = pWav->msadpcm.prevSamples[1][1];
+                    pWav->msadpcm.prevSamples[1][1] = newSample1;
+
+                    pWav->msadpcm.cachedSamples[2] = newSample0;
+                    pWav->msadpcm.cachedSamples[3] = newSample1;
+                    pWav->msadpcm.cachedSampleCount = 2;
+                }
+            }
+        }
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    drwav_assert(pWav != NULL);
+    drwav_assert(samplesToRead > 0);
+    drwav_assert(pBufferOut != NULL);
+
+    // TODO: Lots of room for optimization here.
+
+    drwav_uint64 totalSamplesRead = 0;
+
+    while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
+        // If there are no cached samples we need to load a new block.
+        if (pWav->ima.cachedSampleCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                // Mono.
+                drwav_uint8 header[4];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalSamplesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 1] = pWav->ima.predictor[0];
+                pWav->ima.cachedSampleCount = 1;
+            } else {
+                // Stereo.
+                drwav_uint8 header[8];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalSamplesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
+                pWav->ima.stepIndex[1] = header[6];
+
+                pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 2] = pWav->ima.predictor[0];
+                pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 1] = pWav->ima.predictor[1];
+                pWav->ima.cachedSampleCount = 2;
+            }
+        }
+
+        // Output anything that's cached.
+        while (samplesToRead > 0 && pWav->ima.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
+            pBufferOut[0] = (drwav_int16)pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount];
+            pWav->ima.cachedSampleCount -= 1;
+
+            pBufferOut += 1;
+            samplesToRead -= 1;
+            totalSamplesRead += 1;
+            pWav->compressed.iCurrentSample += 1;
+        }
+
+        if (samplesToRead == 0) {
+            return totalSamplesRead;
+        }
+
+        // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        // loop iteration which will trigger the loading of a new block.
+        if (pWav->ima.cachedSampleCount == 0) {
+            if (pWav->ima.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 indexTable[16] = {
+                    -1, -1, -1, -1, 2, 4, 6, 8,
+                    -1, -1, -1, -1, 2, 4, 6, 8
+                };
+
+                static drwav_int32 stepTable[89] = { 
+                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
+                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
+                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
+                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
+                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
+                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
+                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
+                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
+                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
+                };
+
+                // From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
+                // left channel, 4 bytes for the right channel.
+                pWav->ima.cachedSampleCount = 8 * pWav->channels;
+                for (drwav_uint32 iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                    drwav_uint8 nibbles[4];
+                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
+                        return totalSamplesRead;
+                    }
+                    pWav->ima.bytesRemainingInBlock -= 4;
+
+                    for (drwav_uint32 iByte = 0; iByte < 4; ++iByte) {
+                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
+                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
+
+                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
+
+                        drwav_int32      diff  = step >> 3;
+                        if (nibble0 & 1) diff += step >> 2;
+                        if (nibble0 & 2) diff += step >> 1;
+                        if (nibble0 & 4) diff += step;
+                        if (nibble0 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedSamples[(drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
+
+
+                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        predictor = pWav->ima.predictor[iChannel];
+
+                                         diff  = step >> 3;
+                        if (nibble1 & 1) diff += step >> 2;
+                        if (nibble1 & 2) diff += step >> 1;
+                        if (nibble1 & 4) diff += step;
+                        if (nibble1 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedSamples[(drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
+                    }
+                }
+            }
+        }
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
+{
+    if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    // Don't try to read more samples than can potentially fit in the output buffer.
+    if (samplesToRead * sizeof(drwav_int16) > SIZE_MAX) {
+        samplesToRead = SIZE_MAX / sizeof(drwav_int16);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_s16__pcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_s16__msadpcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_s16__ieee(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_s16__alaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_s16__mulaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_s16__ima(pWav, samplesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    for (size_t i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x - 128;
+        r = r << 8;
+        pOut[i] = (short)r;
+    }
+}
+
+void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    for (size_t i = 0; i < sampleCount; ++i) {
+        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 8;
+        pOut[i] = (short)r;
+    }
+}
+
+void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    int r;
+    for (size_t i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x >> 16;
+        pOut[i] = (short)r;
+    }
+}
+
+void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
+{
+    int r;
+    for (size_t i = 0; i < sampleCount; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5f);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
+{
+    int r;
+    for (size_t i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        double c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    for (size_t i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__alaw_to_s16(pIn[i]);
+    }
+}
+
+void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    for (size_t i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
+    }
+}
+
+
+
+static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample)
+{
+    // Special case for 8-bit sample data because it's treated as unsigned.
+    if (bytesPerSample == 1) {
+        drwav_u8_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+
+    // Slightly more optimal implementation for common formats.
+    if (bytesPerSample == 2) {
+        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
+        return;
+    }
+
+    // Generic, slow converter.
+    for (unsigned int i = 0; i < sampleCount; ++i) {
+        unsigned int sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+        for (unsigned short j = 0; j < bytesPerSample && j < 4; ++j) {
+            sample |= (unsigned int)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += bytesPerSample;
+        *pOut++ = (float)((int)sample / 2147483648.0);
+    }
+}
+
+static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        for (unsigned int i = 0; i < sampleCount; ++i) {
+            *pOut++ = ((float*)pIn)[i];
+        }
+        return;
+    } else {
+        drwav_f64_to_f32(pOut, (double*)pIn, sampleCount);
+        return;
+    }
+}
+
+
+drwav_uint64 drwav_read_f32__pcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        pBufferOut += samplesRead;
+
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    // want to duplicate that code.
+    drwav_uint64 totalSamplesRead = 0;
+    drwav_int16 samples16[2048];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    // want to duplicate that code.
+    drwav_uint64 totalSamplesRead = 0;
+    drwav_int16 samples16[2048];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32__ieee(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    // Fast path.
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bytesPerSample == 4) {
+        return drwav_read(pWav, samplesToRead, pBufferOut);
+    }
+
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32__alaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
+{
+    if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    // Don't try to read more samples than can potentially fit in the output buffer.
+    if (samplesToRead * sizeof(float) > SIZE_MAX) {
+        samplesToRead = SIZE_MAX / sizeof(float);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_f32__pcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_f32__msadpcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_f32__ieee(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_f32__alaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_f32__mulaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_f32__ima(pWav, samplesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    // It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
+    // libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
+    // the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
+    // correctness testing. This is disabled by default.
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
+    }
+#else
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (pIn[i] / 255.0f) * 2 - 1;
+    }
+#endif
+}
+
+void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] / 32768.0f;
+    }
+}
+
+void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        unsigned int s0 = pIn[i*3 + 0];
+        unsigned int s1 = pIn[i*3 + 1];
+        unsigned int s2 = pIn[i*3 + 2];
+
+        int sample32 = (int)((s0 << 8) | (s1 << 16) | (s2 << 24));
+        *pOut++ = (float)(sample32 / 2147483648.0);
+    }
+}
+
+void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)(pIn[i] / 2147483648.0);
+    }
+}
+
+void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)pIn[i];
+    }
+}
+
+void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+
+
+static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+{
+    // Special case for 8-bit sample data because it's treated as unsigned.
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+    // Slightly more optimal implementation for common formats.
+    if (bytesPerSample == 2) {
+        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        for (unsigned int i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((drwav_int32*)pIn)[i];
+        }
+        return;
+    }
+
+    // Generic, slow converter.
+    for (unsigned int i = 0; i < totalSampleCount; ++i) {
+        unsigned int sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+        for (unsigned short j = 0; j < bytesPerSample && j < 4; ++j) {
+            sample |= (unsigned int)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += bytesPerSample;
+        *pOut++ = sample;
+    }
+}
+
+static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s32(pOut, (float*)pIn, totalSampleCount);
+        return;
+    } else {
+        drwav_f64_to_s32(pOut, (double*)pIn, totalSampleCount);
+        return;
+    }
+}
+
+
+drwav_uint64 drwav_read_s32__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    // Fast path.
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bytesPerSample == 4) {
+        return drwav_read(pWav, samplesToRead, pBufferOut);
+    }
+
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    // want to duplicate that code.
+    drwav_uint64 totalSamplesRead = 0;
+    drwav_int16 samples16[2048];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    // want to duplicate that code.
+    drwav_uint64 totalSamplesRead = 0;
+    drwav_int16 samples16[2048];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read_s16(pWav, drwav_min(samplesToRead, 2048), samples16);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalSamplesRead = 0;
+    unsigned char sampleData[4096];
+    while (samplesToRead > 0) {
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        if (samplesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        pBufferOut       += samplesRead;
+        samplesToRead    -= samplesRead;
+        totalSamplesRead += samplesRead;
+    }
+
+    return totalSamplesRead;
+}
+
+drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
+{
+    if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    // Don't try to read more samples than can potentially fit in the output buffer.
+    if (samplesToRead * sizeof(drwav_int32) > SIZE_MAX) {
+        samplesToRead = SIZE_MAX / sizeof(drwav_int32);
+    }
+
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_s32__pcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_s32__msadpcm(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_s32__ieee(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_s32__alaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_s32__mulaw(pWav, samplesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_s32__ima(pWav, samplesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((int)pIn[i] - 128) << 24;
+    }
+}
+
+void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] << 16;
+    }
+}
+
+void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        unsigned int s0 = pIn[i*3 + 0];
+        unsigned int s1 = pIn[i*3 + 1];
+        unsigned int s2 = pIn[i*3 + 2];
+
+        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
+        *pOut++ = sample32;
+    }
+}
+
+void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (size_t i= 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+
+
+drwav_int16* drwav__read_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    drwav_assert(pWav != NULL);
+
+    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int16);
+    if (sampleDataSize > SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    // File's too big.
+    }
+
+    drwav_int16* pSampleData = (drwav_int16*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    // Failed to allocate memory.
+    }
+
+    drwav_uint64 samplesRead = drwav_read_s16(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    if (samplesRead != pWav->totalSampleCount) {
+        DRWAV_FREE(pSampleData);
+        drwav_uninit(pWav);
+        return NULL;    // There was an error reading the samples.
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) *sampleRate = pWav->sampleRate;
+    if (channels) *channels = pWav->channels;
+    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    return pSampleData;
+}
+
+float* drwav__read_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    drwav_assert(pWav != NULL);
+
+    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(float);
+    if (sampleDataSize > SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    // File's too big.
+    }
+
+    float* pSampleData = (float*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    // Failed to allocate memory.
+    }
+
+    drwav_uint64 samplesRead = drwav_read_f32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    if (samplesRead != pWav->totalSampleCount) {
+        DRWAV_FREE(pSampleData);
+        drwav_uninit(pWav);
+        return NULL;    // There was an error reading the samples.
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) *sampleRate = pWav->sampleRate;
+    if (channels) *channels = pWav->channels;
+    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    return pSampleData;
+}
+
+drwav_int32* drwav__read_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    drwav_assert(pWav != NULL);
+
+    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int32);
+    if (sampleDataSize > SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    // File's too big.
+    }
+
+    drwav_int32* pSampleData = (drwav_int32*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    // Failed to allocate memory.
+    }
+
+    drwav_uint64 samplesRead = drwav_read_s32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    if (samplesRead != pWav->totalSampleCount) {
+        DRWAV_FREE(pSampleData);
+        drwav_uninit(pWav);
+        return NULL;    // There was an error reading the samples.
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) *sampleRate = pWav->sampleRate;
+    if (channels) *channels = pWav->channels;
+    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    return pSampleData;
+}
+
+
+drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
+}
+
+float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
+}
+
+drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
+}
+
+#ifndef DR_WAV_NO_STDIO
+drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_file(&wav, filename)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
+}
+
+float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_file(&wav, filename)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
+}
+
+drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_file(&wav, filename)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
+}
+#endif
+
+drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_memory(&wav, data, dataSize)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
+}
+
+float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_memory(&wav, data, dataSize)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
+}
+
+drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
+    if (sampleRate) *sampleRate = 0;
+    if (channels) *channels = 0;
+    if (totalSampleCount) *totalSampleCount = 0;
+
+    drwav wav;
+    if (!drwav_init_memory(&wav, data, dataSize)) {
+        return NULL;
+    }
+
+    return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
+}
+#endif  //DR_WAV_NO_CONVERSION_API
+
+
+void drwav_free(void* pDataReturnedByOpenAndRead)
+{
+    DRWAV_FREE(pDataReturnedByOpenAndRead);
+}
+
+#endif  //DR_WAV_IMPLEMENTATION
+
+
+// REVISION HISTORY
+//
+// v0.7a - 2017-11-17
+//   - Fix some GCC warnings.
+//
+// v0.7 - 2017-11-04
+//   - Add writing APIs.
+//
+// v0.6 - 2017-08-16
+//   - API CHANGE: Rename dr_* types to drwav_*.
+//   - Add support for custom implementations of malloc(), realloc(), etc.
+//   - Add support for Microsoft ADPCM.
+//   - Add support for IMA ADPCM (DVI, format code 0x11).
+//   - Optimizations to drwav_read_s16().
+//   - Bug fixes.
+//
+// v0.5g - 2017-07-16
+//   - Change underlying type for booleans to unsigned.
+//
+// v0.5f - 2017-04-04
+//   - Fix a minor bug with drwav_open_and_read_s16() and family.
+//
+// v0.5e - 2016-12-29
+//   - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
+//   - Minor fixes to documentation.
+//
+// v0.5d - 2016-12-28
+//   - Use drwav_int*/drwav_uint* sized types to improve compiler support.
+//
+// v0.5c - 2016-11-11
+//   - Properly handle JUNK chunks that come before the FMT chunk.
+//
+// v0.5b - 2016-10-23
+//   - A minor change to drwav_bool8 and drwav_bool32 types.
+//
+// v0.5a - 2016-10-11
+//   - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
+//   - Improve A-law and mu-law efficiency.
+//
+// v0.5 - 2016-09-29
+//   - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
+//     keep it consistent with dr_audio and drwav_flac.
+//
+// v0.4b - 2016-09-18
+//   - Fixed a typo in documentation.
+//
+// v0.4a - 2016-09-18
+//   - Fixed a typo.
+//   - Change date format to ISO 8601 (YYYY-MM-DD)
+//
+// v0.4 - 2016-07-13
+//   - API CHANGE. Make onSeek consistent with drwav_flac.
+//   - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with drwav_flac.
+//   - Added support for Sony Wave64.
+//
+// v0.3a - 2016-05-28
+//   - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
+//   - Fixed a memory leak.
+//
+// v0.3 - 2016-05-22
+//   - Lots of API changes for consistency.
+//
+// v0.2a - 2016-05-16
+//   - Fixed Linux/GCC build.
+//
+// v0.2 - 2016-05-11
+//   - Added support for reading data as signed 32-bit PCM for consistency with drwav_flac.
+//
+// v0.1a - 2016-05-07
+//   - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
+//
+// v0.1 - 2016-05-04
+//   - Initial versioned release.
+
+
+/*
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+*/
diff --git a/raylib/external/mini_al.h b/raylib/external/mini_al.h
new file mode 100644
index 0000000..7d83b54
--- /dev/null
+++ b/raylib/external/mini_al.h
@@ -0,0 +1,11601 @@
+// Audio playback and capture library. Public domain. See "unlicense" statement at the end of this file.
+// mini_al - v0.x - 2017-xx-xx
+//
+// David Reid - davidreidsoftware@gmail.com
+
+// ABOUT
+// =====
+// mini_al is a small library for making it easy to connect to a playback or capture device and send
+// or receive data from that device.
+//
+// mini_al uses an asynchronous API. Every device is created with it's own thread, with audio data
+// being delivered to or from the device via a callback. Synchronous APIs are not supported in the
+// interest of keeping the library as simple and light-weight as possible.
+//
+// Supported Backends:
+//   - WASAPI
+//   - DirectSound
+//   - WinMM
+//   - ALSA
+//   - OSS
+//   - OpenSL|ES / Android
+//   - OpenAL
+//   - SDL
+//   - Null (Silence)
+//   - ... and more in the future.
+//     - Core Audio (OSX, iOS)
+//
+// Supported Formats:
+//   - Unsigned 8-bit PCM
+//   - Signed 16-bit PCM
+//   - Signed 24-bit PCM (tightly packed)
+//   - Signed 32-bit PCM
+//   - IEEE 32-bit floating point PCM
+//
+//
+// USAGE
+// =====
+// mini_al is a single-file library. To use it, do something like the following in one .c file.
+//   #define MAL_IMPLEMENTATION
+//   #include "mini_al.h"
+//
+// You can then #include this file in other parts of the program as you would with any other header file.
+//
+// The implementation of this library will try #include-ing necessary headers for each backend. If you do not have
+// the development packages for any particular backend you can disable it by #define-ing the appropriate MAL_NO_*
+// option before the implementation.
+//
+//
+// Building (Windows)
+// ------------------
+// The Windows build should compile clean on all modern versions of MSVC without the need to configure any include
+// paths nor link to any libraries. The same applies to MinGW/GCC and Clang.
+//
+// Building (Linux)
+// ----------------
+// The Linux build uses ALSA for it's backend so you will need to install the relevant ALSA development packages
+// for your preferred distro. It also uses pthreads. Dependencies are dynamically linked at runtime so you do not
+// need to link to -lasound nor -lpthread. You will need to link to -ldl.
+//
+// Building (BSD)
+// --------------
+// The BSD build uses OSS and should Just Work without any linking nor include path configuration.
+//
+// Building (Emscripten)
+// ---------------------
+// The Emscripten build currently uses SDL 1.2 for it's backend which means specifying "-s USE_SDL=2" is unecessary
+// as of this version. However, if in the future there is legitimate benefit or enough demand for SDL 2 to be used
+// instead, you will need to specify this when compiling.
+//
+//
+// Playback Example
+// ----------------
+//   mal_uint32 on_send_samples(mal_device* pDevice, mal_uint32 frameCount, void* pSamples)
+//   {
+//       // This callback is set at initialization time and will be called when a playback device needs more
+//       // data. You need to write as many frames as you can to pSamples (but no more than frameCount) and
+//       // then return the number of frames you wrote.
+//       //
+//       // The user data (pDevice->pUserData) is set by mal_device_init().
+//       return (mal_uint32)drwav_read_f32((drwav*)pDevice->pUserData, frameCount * pDevice->channels, (float*)pSamples) / pDevice->channels;
+//   }
+//
+//   ...
+//
+//   mal_context context;
+//   if (mal_context_init(NULL, 0, NULL, &context) != MAL_SUCCESS) {
+//       printf("Failed to initialize context.");
+//       return -3;
+//   }
+//
+//   mal_device_config config = mal_device_config_init_playback(mal_format_s16, wav.channels, wav.sampleRate, on_send_frames_to_device);
+//
+//   mal_device device;
+//   mal_result result = mal_device_init(&context, mal_device_type_playback, NULL, &config, pMyData, &device);
+//   if (result != MAL_SUCCESS) {
+//       return -1;
+//   }
+//
+//   mal_device_start(&device);     // The device is sleeping by default so you'll need to start it manually.
+//
+//   ...
+//
+//   mal_device_uninit(&device);    // This will stop the device so no need to do that manually.
+//
+//
+//
+// NOTES
+// =====
+// - This library uses an asynchronous API for delivering and requesting audio data. Each device will have
+//   it's own worker thread which is managed by the library.
+// - If mal_device_init() is called with a device that's not aligned to the platform's natural alignment
+//   boundary (4 bytes on 32-bit, 8 bytes on 64-bit), it will _not_ be thread-safe. The reason for this
+//   is that it depends on members of mal_device being correctly aligned for atomic assignments.
+// - Sample data is always little-endian and interleaved. For example, mal_format_s16 means signed 16-bit
+//   integer samples, interleaved. Let me know if you need non-interleaved and I'll look into it.
+//
+//
+//
+// BACKEND NUANCES
+// ===============
+// - The absolute best latency I am able to get on DirectSound is about 10 milliseconds. This seems very
+//   consistent so I'm suspecting there's some kind of hard coded limit there or something.
+// - DirectSound currently supports a maximum of 4 periods.
+// - To capture audio on Android, remember to add the RECORD_AUDIO permission to your manifest:
+//     <uses-permission android:name="android.permission.RECORD_AUDIO" />
+// - UWP is only supported when compiling as C++.
+// - UWP only supports default playback and capture devices.
+// - UWP requires the Microphone capability to be enabled in the application's manifest (Package.appxmanifest):
+//       <Package ...>
+//           ...
+//           <Capabilities>
+//               <DeviceCapability Name="microphone" />
+//           </Capabilities>
+//       </Package>
+//
+//
+// OPTIONS
+// =======
+// #define these options before including this file.
+//
+// #define MAL_NO_WASAPI
+//   Disables the WASAPI backend.
+//
+// #define MAL_NO_DSOUND
+//   Disables the DirectSound backend.
+//
+// #define MAL_NO_WINMM
+//   Disables the WinMM backend.
+//
+// #define MAL_NO_ALSA
+//   Disables the ALSA backend.
+//
+// #define MAL_NO_OSS
+//   Disables the OSS backend.
+//
+// #define MAL_NO_OPENSL
+//   Disables the OpenSL|ES backend.
+//
+// #define MAL_NO_OPENAL
+//   Disables the OpenAL backend.
+//
+// #define MAL_NO_SDL
+//   Disables the SDL backend.
+//
+// #define MAL_NO_NULL
+//   Disables the null backend.
+//
+// #define MAL_DEFAULT_BUFFER_SIZE_IN_MILLISECONDS
+//   When a buffer size of 0 is specified when a device is initialized, it will default to a size with
+//   this number of milliseconds worth of data. Note that some backends may adjust this setting if that
+//   particular backend has unusual latency characteristics.
+//
+// #define MAL_DEFAULT_PERIODS
+//   When a period count of 0 is specified when a device is initialized, it will default to this.
+
+#ifndef mini_al_h
+#define mini_al_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_MSC_VER)
+    #pragma warning(push)
+    #pragma warning(disable:4201)   // nonstandard extension used: nameless struct/union
+#endif
+
+// Platform/backend detection.
+#ifdef _WIN32
+    #define MAL_WIN32
+    #if (!defined(WINAPI_FAMILY) || WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)
+        #define MAL_WIN32_DESKTOP
+    #endif
+#else
+    #define MAL_POSIX
+    #include <pthread.h>    // Unfortunate #include, but needed for pthread_t, pthread_mutex_t and pthread_cond_t types.
+
+    #define MAL_UNIX
+    #ifdef __linux__
+        #define MAL_LINUX
+    #endif
+    #ifdef __APPLE__
+        #define MAL_APPLE
+    #endif
+    #ifdef __ANDROID__
+        #define MAL_ANDROID
+    #endif
+    #ifdef __EMSCRIPTEN__
+        #define MAL_EMSCRIPTEN
+    #endif
+#endif
+
+// Some backends are only supported on certain platforms.
+#if defined(MAL_WIN32)
+    #define MAL_SUPPORT_WASAPI
+    #if defined(MAL_WIN32_DESKTOP)  // DirectSound and WinMM backends are only supported on desktop's.
+        #define MAL_SUPPORT_DSOUND
+        #define MAL_SUPPORT_WINMM
+    #endif
+
+    // Don't support WASAPI on older versions of MSVC for now.
+    #if defined(_MSC_VER)
+        #if _MSC_VER < 1600
+            #if !defined(__audioclient_h__)
+                #undef MAL_SUPPORT_WASAPI
+            #endif
+        #endif
+    #endif
+#endif
+#if defined(MAL_UNIX)
+    #if defined(MAL_LINUX)
+        #if !defined(MAL_ANDROID)   // ALSA is not supported on Android.
+            #define MAL_SUPPORT_ALSA
+        #endif
+    #endif
+    #if defined(MAL_APPLE)
+        #define MAL_SUPPORT_COREAUDIO
+    #endif
+    #if defined(MAL_ANDROID)
+        #define MAL_SUPPORT_OPENSL
+    #endif
+    #if !defined(MAL_LINUX) && !defined(MAL_APPLE) && !defined(MAL_ANDROID) && !defined(MAL_EMSCRIPTEN)
+        #define MAL_SUPPORT_OSS
+    #endif
+#endif
+
+#define MAL_SUPPORT_SDL     // All platforms support SDL.
+
+// Explicitly disable OpenAL and Null backends for Emscripten because they both use a background thread which is not properly supported right now.
+#if !defined(MAL_EMSCRIPTEN)
+#define MAL_SUPPORT_OPENAL
+#define MAL_SUPPORT_NULL    // All platforms support the null backend.
+#endif
+
+
+#if !defined(MAL_NO_WASAPI) && defined(MAL_SUPPORT_WASAPI)
+    #define MAL_ENABLE_WASAPI
+#endif
+#if !defined(MAL_NO_DSOUND) && defined(MAL_SUPPORT_DSOUND)
+    #define MAL_ENABLE_DSOUND
+#endif
+#if !defined(MAL_NO_WINMM) && defined(MAL_SUPPORT_WINMM)
+    #define MAL_ENABLE_WINMM
+#endif
+#if !defined(MAL_NO_ALSA) && defined(MAL_SUPPORT_ALSA)
+    #define MAL_ENABLE_ALSA
+#endif
+#if !defined(MAL_NO_COREAUDIO) && defined(MAL_SUPPORT_COREAUDIO)
+    #define MAL_ENABLE_COREAUDIO
+#endif
+#if !defined(MAL_NO_OSS) && defined(MAL_SUPPORT_OSS)
+    #define MAL_ENABLE_OSS
+#endif
+#if !defined(MAL_NO_OPENSL) && defined(MAL_SUPPORT_OPENSL)
+    #define MAL_ENABLE_OPENSL
+#endif
+#if !defined(MAL_NO_OPENAL) && defined(MAL_SUPPORT_OPENAL)
+    #define MAL_ENABLE_OPENAL
+#endif
+#if !defined(MAL_NO_SDL) && defined(MAL_SUPPORT_SDL)
+    #define MAL_ENABLE_SDL
+#endif
+#if !defined(MAL_NO_NULL) && defined(MAL_SUPPORT_NULL)
+    #define MAL_ENABLE_NULL
+#endif
+
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+typedef   signed char    mal_int8;
+typedef unsigned char    mal_uint8;
+typedef   signed short   mal_int16;
+typedef unsigned short   mal_uint16;
+typedef   signed int     mal_int32;
+typedef unsigned int     mal_uint32;
+typedef   signed __int64 mal_int64;
+typedef unsigned __int64 mal_uint64;
+#else
+#include <stdint.h>
+typedef int8_t           mal_int8;
+typedef uint8_t          mal_uint8;
+typedef int16_t          mal_int16;
+typedef uint16_t         mal_uint16;
+typedef int32_t          mal_int32;
+typedef uint32_t         mal_uint32;
+typedef int64_t          mal_int64;
+typedef uint64_t         mal_uint64;
+#endif
+typedef mal_uint8        mal_bool8;
+typedef mal_uint32       mal_bool32;
+#define MAL_TRUE         1
+#define MAL_FALSE        0
+
+typedef void* mal_handle;
+typedef void* mal_ptr;
+typedef void (* mal_proc)();
+
+typedef struct mal_context mal_context;
+typedef struct mal_device mal_device;
+
+typedef struct
+{
+    mal_context* pContext;
+
+    union
+    {
+#ifdef MAL_WIN32
+        struct
+        {
+            /*HANDLE*/ mal_handle hThread;
+        } win32;
+#endif
+#ifdef MAL_POSIX
+        struct
+        {
+            pthread_t thread;
+        } posix;
+#endif
+
+        int _unused;
+    };
+} mal_thread;
+
+typedef struct
+{
+    mal_context* pContext;
+
+    union
+    {
+#ifdef MAL_WIN32
+        struct
+        {
+            /*HANDLE*/ mal_handle hMutex;
+        } win32;
+#endif
+#ifdef MAL_POSIX
+        struct
+        {
+            pthread_mutex_t mutex;
+        } posix;
+#endif
+
+        int _unused;
+    };
+} mal_mutex;
+
+typedef struct
+{
+    mal_context* pContext;
+
+    union
+    {
+#ifdef MAL_WIN32
+        struct
+        {
+            /*HANDLE*/ mal_handle hEvent;
+        } win32;
+#endif
+#ifdef MAL_POSIX
+        struct
+        {
+            pthread_mutex_t mutex;
+            pthread_cond_t condition;
+            mal_uint32 value;
+        } posix;
+#endif
+
+        int _unused;
+    };
+} mal_event;
+
+#if defined(_MSC_VER) && !defined(_WCHAR_T_DEFINED)
+typedef mal_uint16 wchar_t;
+#endif
+
+// Define NULL for some compilers.
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define MAL_MAX_PERIODS_DSOUND                          4
+#define MAL_MAX_PERIODS_OPENAL                          4
+
+typedef mal_uint8 mal_channel;
+#define MAL_CHANNEL_NONE                                0
+#define MAL_CHANNEL_FRONT_LEFT                          1
+#define MAL_CHANNEL_FRONT_RIGHT                         2
+#define MAL_CHANNEL_FRONT_CENTER                        3
+#define MAL_CHANNEL_LFE                                 4
+#define MAL_CHANNEL_BACK_LEFT                           5
+#define MAL_CHANNEL_BACK_RIGHT                          6
+#define MAL_CHANNEL_FRONT_LEFT_CENTER                   7
+#define MAL_CHANNEL_FRONT_RIGHT_CENTER                  8
+#define MAL_CHANNEL_BACK_CENTER                         9
+#define MAL_CHANNEL_SIDE_LEFT                           10
+#define MAL_CHANNEL_SIDE_RIGHT                          11
+#define MAL_CHANNEL_TOP_CENTER                          12
+#define MAL_CHANNEL_TOP_FRONT_LEFT                      13
+#define MAL_CHANNEL_TOP_FRONT_CENTER                    14
+#define MAL_CHANNEL_TOP_FRONT_RIGHT                     15
+#define MAL_CHANNEL_TOP_BACK_LEFT                       16
+#define MAL_CHANNEL_TOP_BACK_CENTER                     17
+#define MAL_CHANNEL_TOP_BACK_RIGHT                      18
+#define MAL_CHANNEL_MONO                                MAL_CHANNEL_FRONT_CENTER
+#define MAL_MAX_CHANNELS                                18
+
+#define MAL_MAX_SAMPLE_SIZE_IN_BYTES                    8
+
+typedef int mal_result;
+#define MAL_SUCCESS                                      0
+#define MAL_ERROR                                       -1      // A generic error.
+#define MAL_INVALID_ARGS                                -2
+#define MAL_OUT_OF_MEMORY                               -3
+#define MAL_FORMAT_NOT_SUPPORTED                        -4
+#define MAL_NO_BACKEND                                  -5
+#define MAL_NO_DEVICE                                   -6
+#define MAL_API_NOT_FOUND                               -7
+#define MAL_DEVICE_BUSY                                 -8
+#define MAL_DEVICE_NOT_INITIALIZED                      -9
+#define MAL_DEVICE_ALREADY_STARTED                      -10
+#define MAL_DEVICE_ALREADY_STARTING                     -11
+#define MAL_DEVICE_ALREADY_STOPPED                      -12
+#define MAL_DEVICE_ALREADY_STOPPING                     -13
+#define MAL_FAILED_TO_MAP_DEVICE_BUFFER                 -14
+#define MAL_FAILED_TO_INIT_BACKEND                      -15
+#define MAL_FAILED_TO_READ_DATA_FROM_CLIENT             -16
+#define MAL_FAILED_TO_READ_DATA_FROM_DEVICE             -17
+#define MAL_FAILED_TO_SEND_DATA_TO_CLIENT               -18
+#define MAL_FAILED_TO_SEND_DATA_TO_DEVICE               -19
+#define MAL_FAILED_TO_OPEN_BACKEND_DEVICE               -20
+#define MAL_FAILED_TO_START_BACKEND_DEVICE              -21
+#define MAL_FAILED_TO_STOP_BACKEND_DEVICE               -22
+#define MAL_FAILED_TO_CREATE_MUTEX                      -23
+#define MAL_FAILED_TO_CREATE_EVENT                      -24
+#define MAL_FAILED_TO_CREATE_THREAD                     -25
+#define MAL_INVALID_DEVICE_CONFIG                       -26
+#define MAL_ACCESS_DENIED                               -27
+#define MAL_DSOUND_FAILED_TO_CREATE_DEVICE              -1024
+#define MAL_DSOUND_FAILED_TO_SET_COOP_LEVEL             -1025
+#define MAL_DSOUND_FAILED_TO_CREATE_BUFFER              -1026
+#define MAL_DSOUND_FAILED_TO_QUERY_INTERFACE            -1027
+#define MAL_DSOUND_FAILED_TO_SET_NOTIFICATIONS          -1028
+#define MAL_ALSA_FAILED_TO_OPEN_DEVICE                  -2048
+#define MAL_ALSA_FAILED_TO_SET_HW_PARAMS                -2049
+#define MAL_ALSA_FAILED_TO_SET_SW_PARAMS                -2050
+#define MAL_ALSA_FAILED_TO_PREPARE_DEVICE               -2051
+#define MAL_ALSA_FAILED_TO_RECOVER_DEVICE               -2052
+#define MAL_WASAPI_FAILED_TO_CREATE_DEVICE_ENUMERATOR   -3072
+#define MAL_WASAPI_FAILED_TO_CREATE_DEVICE              -3073
+#define MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE            -3074
+#define MAL_WASAPI_FAILED_TO_INITIALIZE_DEVICE          -3075
+#define MAL_WASAPI_FAILED_TO_FIND_BEST_FORMAT           -3076
+#define MAL_WASAPI_FAILED_TO_GET_INTERNAL_BUFFER        -3077
+#define MAL_WASAPI_FAILED_TO_RELEASE_INTERNAL_BUFFER    -3078
+#define MAL_WINMM_FAILED_TO_GET_DEVICE_CAPS             -4096
+#define MAL_WINMM_FAILED_TO_GET_SUPPORTED_FORMATS       -4097
+
+typedef void       (* mal_log_proc) (mal_context* pContext, mal_device* pDevice, const char* message);
+typedef void       (* mal_recv_proc)(mal_device* pDevice, mal_uint32 frameCount, const void* pSamples);
+typedef mal_uint32 (* mal_send_proc)(mal_device* pDevice, mal_uint32 frameCount, void* pSamples);
+typedef void       (* mal_stop_proc)(mal_device* pDevice);
+
+typedef enum
+{
+    mal_backend_null,
+    mal_backend_wasapi,
+    mal_backend_dsound,
+    mal_backend_winmm,
+    mal_backend_alsa,
+    mal_backend_oss,
+    mal_backend_opensl,
+    mal_backend_openal,
+    mal_backend_sdl
+} mal_backend;
+
+typedef enum
+{
+    mal_device_type_playback,
+    mal_device_type_capture
+} mal_device_type;
+
+typedef enum
+{
+    // I like to keep these explicitly defined because they're used as a key into a lookup table. When items are
+    // added to this, make sure there are no gaps and that they're added to the lookup table in mal_get_sample_size_in_bytes().
+    mal_format_unknown = 0,     // Mainly used for indicating an error.
+    mal_format_u8      = 1,
+    mal_format_s16     = 2,     // Seems to be the most widely supported format.
+    mal_format_s24     = 3,     // Tightly packed. 3 bytes per sample.
+    mal_format_s32     = 4,
+    mal_format_f32     = 5,
+} mal_format;
+
+typedef enum
+{
+    mal_channel_mix_mode_basic,     // Drop excess channels; zeroed out extra channels.
+    mal_channel_mix_mode_blend,     // Blend channels based on locality.
+} mal_channel_mix_mode;
+
+typedef union
+{
+#ifdef MAL_SUPPORT_WASAPI
+    wchar_t wasapi[64];             // WASAPI uses a wchar_t string for identification.
+#endif
+#ifdef MAL_SUPPORT_DSOUND
+    mal_uint8 dsound[16];           // DirectSound uses a GUID for identification.
+#endif
+#ifdef MAL_SUPPORT_WINMM
+    /*UINT_PTR*/ mal_uint32 winmm;  // When creating a device, WinMM expects a Win32 UINT_PTR for device identification. In practice it's actually just a UINT.
+#endif
+#ifdef MAL_SUPPORT_ALSA
+    char alsa[256];                 // ALSA uses a name string for identification.
+#endif
+#ifdef MAL_SUPPORT_COREAUDIO
+    // TODO: Implement me.
+#endif
+#ifdef MAL_SUPPORT_OSS
+    char oss[64];                   // "dev/dsp0", etc. "dev/dsp" for the default device.
+#endif
+#ifdef MAL_SUPPORT_OPENSL
+    mal_uint32 opensl;              // OpenSL|ES uses a 32-bit unsigned integer for identification.
+#endif
+#ifdef MAL_SUPPORT_OPENAL
+    char openal[256];               // OpenAL seems to use human-readable device names as the ID.
+#endif
+#ifdef MAL_SUPPORT_SDL
+    int sdl;                        // SDL devices are identified with an index.
+#endif
+#ifdef MAL_SUPPORT_NULL
+    int nullbackend;                // Always 0.
+#endif
+} mal_device_id;
+
+typedef struct
+{
+    mal_device_id id;
+    char name[256];
+} mal_device_info;
+
+typedef struct
+{
+    mal_int64 counter;
+} mal_timer;
+
+
+typedef struct mal_src mal_src;
+typedef mal_uint32 (* mal_src_read_proc)(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, void* pUserData); // Returns the number of frames that were read.
+
+typedef enum
+{
+    mal_src_algorithm_none,
+    mal_src_algorithm_linear
+} mal_src_algorithm;
+
+#define MAL_SRC_CACHE_SIZE_IN_FRAMES    512
+typedef struct
+{
+    mal_src* pSRC;
+    float pCachedFrames[MAL_MAX_CHANNELS * MAL_SRC_CACHE_SIZE_IN_FRAMES];
+    mal_uint32 cachedFrameCount;
+    mal_uint32 iNextFrame;
+} mal_src_cache;
+
+typedef struct
+{
+    mal_uint32 sampleRateIn;
+    mal_uint32 sampleRateOut;
+    mal_format formatIn;
+    mal_format formatOut;
+    mal_uint32 channels;
+    mal_src_algorithm algorithm;
+    mal_uint32 cacheSizeInFrames;  // The number of frames to read from the client at a time.
+} mal_src_config;
+
+struct mal_src
+{
+    mal_src_config config;
+    mal_src_read_proc onRead;
+    void* pUserData;
+    float bin[256];
+    mal_src_cache cache;    // <-- For simplifying and optimizing client -> memory reading.
+
+    union
+    {
+        struct
+        {
+            float alpha;
+            mal_bool32 isPrevFramesLoaded : 1;
+            mal_bool32 isNextFramesLoaded : 1;
+        } linear;
+    };
+};
+
+typedef struct mal_dsp mal_dsp;
+typedef mal_uint32 (* mal_dsp_read_proc)(mal_dsp* pDSP, mal_uint32 frameCount, void* pSamplesOut, void* pUserData);
+
+typedef struct
+{
+    mal_format  formatIn;
+    mal_uint32  channelsIn;
+    mal_uint32  sampleRateIn;
+    mal_channel channelMapIn[MAL_MAX_CHANNELS];
+    mal_format  formatOut;
+    mal_uint32  channelsOut;
+    mal_uint32  sampleRateOut;
+    mal_channel channelMapOut[MAL_MAX_CHANNELS];
+    mal_uint32  cacheSizeInFrames;  // Applications should set this to 0 for now.
+} mal_dsp_config;
+
+struct mal_dsp
+{
+    mal_dsp_config config;
+    mal_dsp_read_proc onRead;
+    void* pUserDataForOnRead;
+    mal_src src;    // For sample rate conversion.
+    mal_channel channelMapInPostMix[MAL_MAX_CHANNELS];   // <-- When mixing, new channels may need to be created. This represents the channel map after mixing.
+    mal_channel channelShuffleTable[MAL_MAX_CHANNELS];
+    mal_bool32 isChannelMappingRequired : 1;
+    mal_bool32 isSRCRequired : 1;
+    mal_bool32 isPassthrough : 1;       // <-- Will be set to true when the DSP pipeline is an optimized passthrough.
+};
+
+
+typedef struct
+{
+    mal_format format;
+    mal_uint32 channels;
+    mal_uint32 sampleRate;
+    mal_channel channelMap[MAL_MAX_CHANNELS];
+    mal_uint32 bufferSizeInFrames;
+    mal_uint32 periods;
+    mal_bool32 preferExclusiveMode;
+    mal_recv_proc onRecvCallback;
+    mal_send_proc onSendCallback;
+    mal_stop_proc onStopCallback;
+
+    struct
+    {
+        mal_bool32 noMMap;  // Disables MMap mode.
+    } alsa;
+} mal_device_config;
+
+typedef struct
+{
+    mal_log_proc onLog;
+
+    struct
+    {
+        mal_bool32 useVerboseDeviceEnumeration;
+        mal_bool32 excludeNullDevice;
+    } alsa;
+} mal_context_config;
+
+struct mal_context
+{
+    mal_backend backend;    // DirectSound, ALSA, etc.
+    mal_context_config config;
+
+    union
+    {
+#ifdef MAL_SUPPORT_WASAPI
+        struct
+        {
+            int _unused;
+        } wasapi;
+#endif
+#ifdef MAL_SUPPORT_DSOUND
+        struct
+        {
+            /*HMODULE*/ mal_handle hDSoundDLL;
+        } dsound;
+#endif
+#ifdef MAL_SUPPORT_WINMM
+        struct
+        {
+            /*HMODULE*/ mal_handle hWinMM;
+            mal_proc waveOutGetNumDevs;
+            mal_proc waveOutGetDevCapsA;
+            mal_proc waveOutOpen;
+            mal_proc waveOutClose;
+            mal_proc waveOutPrepareHeader;
+            mal_proc waveOutUnprepareHeader;
+            mal_proc waveOutWrite;
+            mal_proc waveOutReset;
+            mal_proc waveInGetNumDevs;
+            mal_proc waveInGetDevCapsA;
+            mal_proc waveInOpen;
+            mal_proc waveInClose;
+            mal_proc waveInPrepareHeader;
+            mal_proc waveInUnprepareHeader;
+            mal_proc waveInAddBuffer;
+            mal_proc waveInStart;
+            mal_proc waveInReset;
+        } winmm;
+#endif
+#ifdef MAL_SUPPORT_ALSA
+        struct
+        {
+            mal_handle asoundSO;
+            mal_proc snd_pcm_open;
+            mal_proc snd_pcm_close;
+            mal_proc snd_pcm_hw_params_sizeof;
+            mal_proc snd_pcm_hw_params_any;
+            mal_proc snd_pcm_hw_params_set_format;
+            mal_proc snd_pcm_hw_params_set_format_first;
+            mal_proc snd_pcm_hw_params_get_format_mask;
+            mal_proc snd_pcm_hw_params_set_channels_near;
+            mal_proc snd_pcm_hw_params_set_rate_resample;
+            mal_proc snd_pcm_hw_params_set_rate_near;
+            mal_proc snd_pcm_hw_params_set_buffer_size_near;
+            mal_proc snd_pcm_hw_params_set_periods_near;
+            mal_proc snd_pcm_hw_params_set_access;
+            mal_proc snd_pcm_hw_params_get_format;
+            mal_proc snd_pcm_hw_params_get_channels;
+            mal_proc snd_pcm_hw_params_get_rate;
+            mal_proc snd_pcm_hw_params_get_buffer_size;
+            mal_proc snd_pcm_hw_params_get_periods;
+            mal_proc snd_pcm_hw_params_get_access;
+            mal_proc snd_pcm_hw_params;
+            mal_proc snd_pcm_sw_params_sizeof;
+            mal_proc snd_pcm_sw_params_current;
+            mal_proc snd_pcm_sw_params_set_avail_min;
+            mal_proc snd_pcm_sw_params_set_start_threshold;
+            mal_proc snd_pcm_sw_params;
+            mal_proc snd_pcm_format_mask_sizeof;
+            mal_proc snd_pcm_format_mask_test;
+            mal_proc snd_pcm_get_chmap;
+            mal_proc snd_pcm_prepare;
+            mal_proc snd_pcm_start;
+            mal_proc snd_pcm_drop;
+            mal_proc snd_device_name_hint;
+            mal_proc snd_device_name_get_hint;
+            mal_proc snd_card_get_index;
+            mal_proc snd_device_name_free_hint;
+            mal_proc snd_pcm_mmap_begin;
+            mal_proc snd_pcm_mmap_commit;
+            mal_proc snd_pcm_recover;
+            mal_proc snd_pcm_readi;
+            mal_proc snd_pcm_writei;
+            mal_proc snd_pcm_avail;
+            mal_proc snd_pcm_avail_update;
+            mal_proc snd_pcm_wait;
+            mal_proc snd_pcm_info;
+            mal_proc snd_pcm_info_sizeof;
+            mal_proc snd_pcm_info_get_name;
+        } alsa;
+#endif
+#ifdef MAL_SUPPORT_COREAUDIO
+        struct
+        {
+            int _unused;
+        } coreaudio;
+#endif
+#ifdef MAL_SUPPORT_OSS
+        struct
+        {
+            int versionMajor;
+            int versionMinor;
+        } oss;
+#endif
+#ifdef MAL_SUPPORT_OPENSL
+        struct
+        {
+            int _unused;
+        } opensl;
+#endif
+#ifdef MAL_SUPPORT_OPENAL
+        struct
+        {
+            /*HMODULE*/ mal_handle hOpenAL;     // OpenAL32.dll, etc.
+            mal_proc alcCreateContext;
+            mal_proc alcMakeContextCurrent;
+            mal_proc alcProcessContext;
+            mal_proc alcSuspendContext;
+            mal_proc alcDestroyContext;
+            mal_proc alcGetCurrentContext;
+            mal_proc alcGetContextsDevice;
+            mal_proc alcOpenDevice;
+            mal_proc alcCloseDevice;
+            mal_proc alcGetError;
+            mal_proc alcIsExtensionPresent;
+            mal_proc alcGetProcAddress;
+            mal_proc alcGetEnumValue;
+            mal_proc alcGetString;
+            mal_proc alcGetIntegerv;
+            mal_proc alcCaptureOpenDevice;
+            mal_proc alcCaptureCloseDevice;
+            mal_proc alcCaptureStart;
+            mal_proc alcCaptureStop;
+            mal_proc alcCaptureSamples;
+
+            mal_proc alEnable;
+            mal_proc alDisable;
+            mal_proc alIsEnabled;
+            mal_proc alGetString;
+            mal_proc alGetBooleanv;
+            mal_proc alGetIntegerv;
+            mal_proc alGetFloatv;
+            mal_proc alGetDoublev;
+            mal_proc alGetBoolean;
+            mal_proc alGetInteger;
+            mal_proc alGetFloat;
+            mal_proc alGetDouble;
+            mal_proc alGetError;
+            mal_proc alIsExtensionPresent;
+            mal_proc alGetProcAddress;
+            mal_proc alGetEnumValue;
+            mal_proc alGenSources;
+            mal_proc alDeleteSources;
+            mal_proc alIsSource;
+            mal_proc alSourcef;
+            mal_proc alSource3f;
+            mal_proc alSourcefv;
+            mal_proc alSourcei;
+            mal_proc alSource3i;
+            mal_proc alSourceiv;
+            mal_proc alGetSourcef;
+            mal_proc alGetSource3f;
+            mal_proc alGetSourcefv;
+            mal_proc alGetSourcei;
+            mal_proc alGetSource3i;
+            mal_proc alGetSourceiv;
+            mal_proc alSourcePlayv;
+            mal_proc alSourceStopv;
+            mal_proc alSourceRewindv;
+            mal_proc alSourcePausev;
+            mal_proc alSourcePlay;
+            mal_proc alSourceStop;
+            mal_proc alSourceRewind;
+            mal_proc alSourcePause;
+            mal_proc alSourceQueueBuffers;
+            mal_proc alSourceUnqueueBuffers;
+            mal_proc alGenBuffers;
+            mal_proc alDeleteBuffers;
+            mal_proc alIsBuffer;
+            mal_proc alBufferData;
+            mal_proc alBufferf;
+            mal_proc alBuffer3f;
+            mal_proc alBufferfv;
+            mal_proc alBufferi;
+            mal_proc alBuffer3i;
+            mal_proc alBufferiv;
+            mal_proc alGetBufferf;
+            mal_proc alGetBuffer3f;
+            mal_proc alGetBufferfv;
+            mal_proc alGetBufferi;
+            mal_proc alGetBuffer3i;
+            mal_proc alGetBufferiv;
+
+            mal_bool32 isEnumerationSupported : 1;
+            mal_bool32 isFloat32Supported   : 1;
+            mal_bool32 isMCFormatsSupported : 1;
+        } openal;
+#endif
+#ifdef MAL_SUPPORT_SDL
+        struct
+        {
+            mal_handle hSDL;    // SDL
+            mal_proc SDL_InitSubSystem;
+            mal_proc SDL_QuitSubSystem;
+            mal_proc SDL_CloseAudio;
+            mal_proc SDL_OpenAudio;
+            mal_proc SDL_PauseAudio;
+            mal_proc SDL_GetNumAudioDevices;
+            mal_proc SDL_GetAudioDeviceName;
+            mal_proc SDL_CloseAudioDevice;
+            mal_proc SDL_OpenAudioDevice;
+            mal_proc SDL_PauseAudioDevice;
+
+            mal_bool32 usingSDL1;
+        } sdl;
+#endif
+#ifdef MAL_SUPPORT_NULL
+        struct
+        {
+            int _unused;
+        } null_backend;
+#endif
+    };
+
+    union
+    {
+#ifdef MAL_WIN32
+        struct
+        {
+            /*HMODULE*/ mal_handle hOle32DLL;
+            mal_proc CoInitializeEx;
+            mal_proc CoUninitialize;
+            mal_proc CoCreateInstance;
+            mal_proc CoTaskMemFree;
+            mal_proc PropVariantClear;
+
+            /*HMODULE*/ mal_handle hUser32DLL;
+            mal_proc GetForegroundWindow;
+            mal_proc GetDesktopWindow;
+        } win32;
+#endif
+#ifdef MAL_POSIX
+        struct
+        {
+            mal_handle pthreadSO;
+            mal_proc pthread_create;
+            mal_proc pthread_join;
+            mal_proc pthread_mutex_init;
+            mal_proc pthread_mutex_destroy;
+            mal_proc pthread_mutex_lock;
+            mal_proc pthread_mutex_unlock;
+            mal_proc pthread_cond_init;
+            mal_proc pthread_cond_destroy;
+            mal_proc pthread_cond_wait;
+            mal_proc pthread_cond_signal;
+        } posix;
+#endif
+        int _unused;
+    };
+};
+
+struct mal_device
+{
+    mal_context* pContext;
+    mal_device_type type;
+    mal_format format;
+    mal_uint32 channels;
+    mal_uint32 sampleRate;
+    mal_uint8  channelMap[MAL_MAX_CHANNELS];
+    mal_uint32 bufferSizeInFrames;
+    mal_uint32 periods;
+    mal_uint32 state;
+    mal_recv_proc onRecv;
+    mal_send_proc onSend;
+    mal_stop_proc onStop;
+    void* pUserData;        // Application defined data.
+    char name[256];
+    mal_mutex lock;
+    mal_event wakeupEvent;
+    mal_event startEvent;
+    mal_event stopEvent;
+    mal_thread thread;
+    mal_result workResult;  // This is set by the worker thread after it's finished doing a job.
+    mal_bool32 usingDefaultBufferSize : 1;
+    mal_bool32 usingDefaultPeriods    : 1;
+    mal_bool32 exclusiveMode          : 1;
+    mal_format internalFormat;
+    mal_uint32 internalChannels;
+    mal_uint32 internalSampleRate;
+    mal_uint8  internalChannelMap[MAL_MAX_CHANNELS];
+    mal_dsp dsp;                    // Samples run through this to convert samples to a format suitable for use by the backend.
+    mal_uint32 _dspFrameCount;      // Internal use only. Used when running the device -> DSP -> client pipeline. See mal_device__on_read_from_device().
+    const mal_uint8* _dspFrames;    // ^^^ AS ABOVE ^^^
+
+    union
+    {
+#ifdef MAL_SUPPORT_WASAPI
+        struct
+        {
+            /*IAudioClient**/ mal_ptr pAudioClient;
+            /*IAudioRenderClient**/ mal_ptr pRenderClient;
+            /*IAudioCaptureClient**/ mal_ptr pCaptureClient;
+            /*HANDLE*/ mal_handle hEvent;
+            /*HANDLE*/ mal_handle hStopEvent;
+            mal_bool32 breakFromMainLoop;
+        } wasapi;
+#endif
+#ifdef MAL_SUPPORT_DSOUND
+        struct
+        {
+            /*HMODULE*/ mal_handle hDSoundDLL;
+            /*LPDIRECTSOUND*/ mal_ptr pPlayback;
+            /*LPDIRECTSOUNDBUFFER*/ mal_ptr pPlaybackPrimaryBuffer;
+            /*LPDIRECTSOUNDBUFFER*/ mal_ptr pPlaybackBuffer;
+            /*LPDIRECTSOUNDCAPTURE*/ mal_ptr pCapture;
+            /*LPDIRECTSOUNDCAPTUREBUFFER*/ mal_ptr pCaptureBuffer;
+            /*LPDIRECTSOUNDNOTIFY*/ mal_ptr pNotify;
+            /*HANDLE*/ mal_handle pNotifyEvents[MAL_MAX_PERIODS_DSOUND];  // One event handle for each period.
+            /*HANDLE*/ mal_handle hStopEvent;
+            mal_uint32 lastProcessedFrame;      // This is circular.
+            mal_bool32 breakFromMainLoop;
+        } dsound;
+#endif
+#ifdef MAL_SUPPORT_WINMM
+        struct
+        {
+            /*HWAVEOUT, HWAVEIN*/ mal_handle hDevice;
+            /*HANDLE*/ mal_handle hEvent;
+            mal_uint32 fragmentSizeInFrames;
+            mal_uint32 fragmentSizeInBytes;
+            mal_uint32 iNextHeader;             // [0,periods). Used as an index into pWAVEHDR.
+            /*WAVEHDR**/ mal_uint8* pWAVEHDR;   // One instantiation for each period.
+            mal_uint8* pIntermediaryBuffer;
+            mal_uint8* _pHeapData;              // Used internally and is used for the heap allocated data for the intermediary buffer and the WAVEHDR structures.
+            mal_bool32 breakFromMainLoop;
+        } winmm;
+#endif
+#ifdef MAL_SUPPORT_ALSA
+        struct
+        {
+            /*snd_pcm_t**/ mal_ptr pPCM;
+            mal_bool32 isUsingMMap       : 1;
+            mal_bool32 breakFromMainLoop : 1;
+            void* pIntermediaryBuffer;
+        } alsa;
+#endif
+#ifdef MAL_SUPPORT_COREAUDIO
+        struct
+        {
+            int _unused;
+        } coreaudio;
+#endif
+#ifdef MAL_SUPPORT_OSS
+        struct
+        {
+            int fd;
+            mal_uint32 fragmentSizeInFrames;
+            mal_bool32 breakFromMainLoop;
+            void* pIntermediaryBuffer;
+        } oss;
+#endif
+#ifdef MAL_SUPPORT_OPENSL
+        struct
+        {
+            /*SLObjectItf*/ mal_ptr pOutputMixObj;
+            /*SLOutputMixItf*/ mal_ptr pOutputMix;
+            /*SLObjectItf*/ mal_ptr pAudioPlayerObj;
+            /*SLPlayItf*/ mal_ptr pAudioPlayer;
+            /*SLObjectItf*/ mal_ptr pAudioRecorderObj;
+            /*SLRecordItf*/ mal_ptr pAudioRecorder;
+            /*SLAndroidSimpleBufferQueueItf*/ mal_ptr pBufferQueue;
+            mal_uint32 periodSizeInFrames;
+            mal_uint32 currentBufferIndex;
+            mal_uint8* pBuffer;                 // This is malloc()'d and is used for storing audio data. Typed as mal_uint8 for easy offsetting.
+        } opensl;
+#endif
+#ifdef MAL_SUPPORT_OPENAL
+        struct
+        {
+            /*ALCcontext**/ mal_ptr pContextALC;
+            /*ALCdevice**/ mal_ptr pDeviceALC;
+            /*ALuint*/ mal_uint32 sourceAL;
+            /*ALuint*/ mal_uint32 buffersAL[MAL_MAX_PERIODS_OPENAL];
+            /*ALenum*/ mal_uint32 formatAL;
+            mal_uint32 subBufferSizeInFrames;   // This is the size of each of the OpenAL buffers (buffersAL).
+            mal_uint8* pIntermediaryBuffer;     // This is malloc()'d and is used as the destination for reading from the client. Typed as mal_uint8 for easy offsetting.
+            mal_uint32 iNextBuffer;             // The next buffer to unenqueue and then re-enqueue as new data is read.
+            mal_bool32 breakFromMainLoop;
+        } openal;
+#endif
+#ifdef MAL_SUPPORT_SDL
+        struct
+        {
+            mal_uint32 deviceID;
+        } sdl;
+#endif
+#ifdef MAL_SUPPORT_NULL
+        struct
+        {
+            mal_timer timer;
+            mal_uint32 lastProcessedFrame;      // This is circular.
+            mal_bool32 breakFromMainLoop;
+            mal_uint8* pBuffer;                 // This is malloc()'d and is used as the destination for reading from the client. Typed as mal_uint8 for easy offsetting.
+        } null_device;
+#endif
+    };
+};
+#if defined(_MSC_VER)
+    #pragma warning(pop)
+#endif
+
+// Initializes a context.
+//
+// The context is used for selecting and initializing the relevant backends.
+//
+// Note that the location of the device cannot change throughout it's lifetime. Consider allocating
+// the mal_context object with malloc() if this is an issue. The reason for this is that a pointer
+// to the context is stored in the mal_device structure.
+//
+// <backends> is used to allow the application to prioritize backends depending on it's specific
+// requirements. This can be null in which case it uses the default priority, which is as follows:
+//   - WASAPI
+//   - DirectSound
+//   - WinMM
+//   - ALSA
+//   - OSS
+//   - OpenSL|ES
+//   - OpenAL
+//   - SDL
+//   - Null
+//
+// The onLog callback is used for posting log messages back to the client for diagnostics, debugging,
+// etc. You can pass NULL for this if you do not need it.
+//
+// Return Value:
+//   MAL_SUCCESS if successful; any other error code otherwise.
+//
+// Thread Safety: UNSAFE
+//
+// Effeciency: LOW
+//   This will dynamically load backends DLLs/SOs (such as dsound.dll).
+mal_result mal_context_init(mal_backend backends[], mal_uint32 backendCount, const mal_context_config* pConfig, mal_context* pContext);
+
+// Uninitializes a context.
+//
+// Results are undefined if you call this while any device created by this context is still active.
+//
+// Return Value:
+//   MAL_SUCCESS if successful; any other error code otherwise.
+//
+// Thread Safety: UNSAFE
+//
+// Efficiency: LOW
+//   This will unload the backend DLLs/SOs.
+mal_result mal_context_uninit(mal_context* pContext);
+
+// Enumerates over each device of the given type (playback or capture).
+//
+// It is _not_ safe to assume the first enumerated device is the default device.
+//
+// Some backends and platforms may only support default playback and capture devices.
+//
+// Return Value:
+//   MAL_SUCCESS if successful; any other error code otherwise.
+//
+// Thread Safety: SAFE, SEE NOTES.
+//   This API uses an application-defined buffer for output. This is thread-safe so long as the
+//   application ensures mutal exclusion to the output buffer at their level.
+//
+// Efficiency: LOW
+mal_result mal_enumerate_devices(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo);
+
+// Initializes a device.
+//
+// The device ID (pDeviceID) can be null, in which case the default device is used. Otherwise, you
+// can retrieve the ID by calling mal_enumerate_devices() and using the ID from the returned data.
+// Set pDeviceID to NULL to use the default device. Do _not_ rely on the first device ID returned
+// by mal_enumerate_devices() to be the default device.
+//
+// This will try it's hardest to create a valid device, even if it means adjusting input arguments.
+// Look at pDevice->internalChannels, pDevice->internalSampleRate, etc. to determine the actual
+// properties after initialization.
+//
+// If <bufferSizeInFrames> is 0, it will default to MAL_DEFAULT_BUFFER_SIZE_IN_MILLISECONDS. If
+// <periods> is set to 0 it will default to MAL_DEFAULT_PERIODS.
+//
+// The <periods> property controls how frequently the background thread is woken to check for more
+// data. It's tied to the buffer size, so as an example, if your buffer size is equivalent to 10
+// milliseconds and you have 2 periods, the CPU will wake up approximately every 5 milliseconds.
+//
+// Use mal_device_config_init(), mal_device_config_init_playback(), etc. to initialize a
+// mal_device_config object.
+//
+// When compiling for UWP you must ensure you call this function on the main UI thread because the
+// operating system may need to present the user with a message asking for permissions. Please refer
+// to the official documentation for ActivateAudioInterfaceAsync() for more information.
+//
+// Return Value:
+//   MAL_SUCCESS if successful; any other error code otherwise.
+//
+// Thread Safety: UNSAFE
+//   It is not safe to call this function simultaneously for different devices because some backends
+//   depend on and mutate global state (such as OpenSL|ES). The same applies to calling this as the
+//   same time as mal_device_uninit().
+//
+//   Results are undefined if you try using a device before this function has returned.
+//
+// Efficiency: LOW
+//   This is just slow due to the nature of it being an initialization API.
+mal_result mal_device_init(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, void* pUserData, mal_device* pDevice);
+
+// Uninitializes a device.
+//
+// This will explicitly stop the device. You do not need to call mal_device_stop() beforehand, but it's
+// harmless if you do.
+//
+// Return Value:
+//   MAL_SUCCESS if successful; any other error code otherwise.
+//
+// Thread Safety: UNSAFE
+//   As soon as this API is called the device should be considered undefined. All bets are off if you
+//   try using the device at the same time as uninitializing it.
+//
+// Efficiency: LOW
+//   This will stop the device with mal_device_stop() which is a slow, synchronized call. It also needs
+//   to destroy internal objects like the backend-specific objects and the background thread.
+void mal_device_uninit(mal_device* pDevice);
+
+// Sets the callback to use when the application has received data from the device.
+//
+// Thread Safety: SAFE
+//   This API is implemented as a simple atomic assignment.
+//
+// Efficiency: HIGH
+//   This is just an atomic assignment.
+void mal_device_set_recv_callback(mal_device* pDevice, mal_recv_proc proc);
+
+// Sets the callback to use when the application needs to send data to the device for playback.
+//
+// Note that the implementation of this callback must copy over as many samples as is available. The
+// return value specifies how many samples were written to the output buffer. The backend will fill
+// any leftover samples with silence.
+//
+// Thread Safety: SAFE
+//   This API is implemented as a simple atomic assignment.
+//
+// Efficiency: HIGH
+//   This is just an atomic assignment.
+void mal_device_set_send_callback(mal_device* pDevice, mal_send_proc proc);
+
+// Sets the callback to use when the device has stopped, either explicitly or as a result of an error.
+//
+// Thread Safety: SAFE
+//   This API is implemented as a simple atomic assignment.
+//
+// Efficiency: HIGH
+//   This is just an atomic assignment.
+void mal_device_set_stop_callback(mal_device* pDevice, mal_stop_proc proc);
+
+// Activates the device. For playback devices this begins playback. For capture devices it begins
+// recording.
+//
+// For a playback device, this will retrieve an initial chunk of audio data from the client before
+// returning. The reason for this is to ensure there is valid audio data in the buffer, which needs
+// to be done _before_ the device begins playback.
+//
+// Return Value:
+//   - MAL_SUCCESS if successful; any other error code otherwise.
+//   - MAL_INVALID_ARGS
+//       One or more of the input arguments is invalid.
+//   - MAL_DEVICE_NOT_INITIALIZED
+//       The device is not currently or was never initialized.
+//   - MAL_DEVICE_BUSY
+//       The device is in the process of stopping. This will only happen if mal_device_start() and
+//       mal_device_stop() is called simultaneous on separate threads. This will never be returned in
+//       single-threaded applications.
+//   - MAL_DEVICE_ALREADY_STARTING
+//       The device is already in the process of starting. This will never be returned in single-threaded
+//       applications.
+//   - MAL_DEVICE_ALREADY_STARTED
+//       The device is already started.
+//   - MAL_FAILED_TO_READ_DATA_FROM_CLIENT
+//       Failed to read the initial chunk of audio data from the client. This initial chunk of data is
+//       required so that the device has valid audio data as soon as it starts playing. This will never
+//       be returned for capture devices.
+//   - MAL_FAILED_TO_START_BACKEND_DEVICE
+//       There was a backend-specific error starting the device.
+//
+// Thread Safety: SAFE
+//
+// Efficiency: LOW
+//   This API waits until the backend device has been started for real by the worker thread. It also
+//   waits on a mutex for thread-safety.
+mal_result mal_device_start(mal_device* pDevice);
+
+// Puts the device to sleep, but does not uninitialize it. Use mal_device_start() to start it up again.
+//
+// Return Value:
+//   - MAL_SUCCESS if successful; any other error code otherwise.
+//   - MAL_INVALID_ARGS
+//       One or more of the input arguments is invalid.
+//   - MAL_DEVICE_NOT_INITIALIZED
+//       The device is not currently or was never initialized.
+//   - MAL_DEVICE_BUSY
+//       The device is in the process of starting. This will only happen if mal_device_start() and
+//       mal_device_stop() is called simultaneous on separate threads. This will never be returned in
+//       single-threaded applications.
+//   - MAL_DEVICE_ALREADY_STOPPING
+//       The device is already in the process of stopping. This will never be returned in single-threaded
+//       applications.
+//   - MAL_DEVICE_ALREADY_STOPPED
+//       The device is already stopped.
+//   - MAL_FAILED_TO_STOP_BACKEND_DEVICE
+//       There was a backend-specific error stopping the device.
+//
+// Thread Safety: SAFE
+//
+// Efficiency: LOW
+//   This API needs to wait on the worker thread to stop the backend device properly before returning. It
+//   also waits on a mutex for thread-safety.
+//
+//   In addition, some backends need to wait for the device to finish playback/recording of the current
+//   fragment which can take some time (usually proportionate to the buffer size used when initializing
+//   the device).
+mal_result mal_device_stop(mal_device* pDevice);
+
+// Determines whether or not the device is started.
+//
+// Return Value:
+//   True if the device is started, false otherwise.
+//
+// Thread Safety: SAFE
+//   If another thread calls mal_device_start() or mal_device_stop() at this same time as this function
+//   is called, there's a very small chance the return value will be out of sync.
+//
+// Efficiency: HIGH
+//   This is implemented with a simple accessor.
+mal_bool32 mal_device_is_started(mal_device* pDevice);
+
+// Retrieves the size of the buffer in bytes for the given device.
+//
+// Thread Safety: SAFE
+//   This is calculated from constant values which are set at initialization time and never change.
+//
+// Efficiency: HIGH
+//   This is implemented with just a few 32-bit integer multiplications.
+mal_uint32 mal_device_get_buffer_size_in_bytes(mal_device* pDevice);
+
+// Retrieves the size of a sample in bytes for the given format.
+//
+// Thread Safety: SAFE
+//   This is API is pure.
+//
+// Efficiency: HIGH
+//   This is implemented with a lookup table.
+mal_uint32 mal_get_sample_size_in_bytes(mal_format format);
+
+// Helper function for initializing a mal_context_config object.
+mal_context_config mal_context_config_init(mal_log_proc onLog);
+
+// Helper function for initializing a mal_device_config object.
+//
+// This is just a helper API, and as such the returned object can be safely modified as needed.
+//
+// The default channel mapping is based on the channel count, as per the table below. Note that these
+// can be freely changed after this function returns if you are needing something in particular.
+//
+// |---------------|------------------------------|
+// | Channel Count | Mapping                      |
+// |---------------|------------------------------|
+// | 1 (Mono)      | 0: MAL_CHANNEL_FRONT_CENTER  |
+// |---------------|------------------------------|
+// | 2 (Stereo)    | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |---------------|------------------------------|
+// | 3 (2.1)       | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |               | 2: MAL_CHANNEL_LFE           |
+// |---------------|------------------------------|
+// | 4 (Quad)      | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |               | 2: MAL_CHANNEL_BACK_LEFT     |
+// |               | 3: MAL_CHANNEL_BACK_RIGHT    |
+// |---------------|------------------------------|
+// | 5 (4.1)       | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |               | 2: MAL_CHANNEL_BACK_LEFT     |
+// |               | 3: MAL_CHANNEL_BACK_RIGHT    |
+// |               | 4: MAL_CHANNEL_LFE           |
+// |---------------|------------------------------|
+// | 6 (5.1)       | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |               | 2: MAL_CHANNEL_FRONT_CENTER  |
+// |               | 3: MAL_CHANNEL_LFE           |
+// |               | 4: MAL_CHANNEL_BACK_LEFT     |
+// |               | 5: MAL_CHANNEL_BACK_RIGHT    |
+// |---------------|------------------------------|
+// | 8 (7.1)       | 0: MAL_CHANNEL_FRONT_LEFT    |
+// |               | 1: MAL_CHANNEL_FRONT_RIGHT   |
+// |               | 2: MAL_CHANNEL_FRONT_CENTER  |
+// |               | 3: MAL_CHANNEL_LFE           |
+// |               | 4: MAL_CHANNEL_BACK_LEFT     |
+// |               | 5: MAL_CHANNEL_BACK_RIGHT    |
+// |               | 6: MAL_CHANNEL_SIDE_LEFT     |
+// |               | 7: MAL_CHANNEL_SIDE_RIGHT    |
+// |---------------|------------------------------|
+// | Other         | All channels set to 0. This  |
+// |               | is equivalent to the same    |
+// |               | mapping as the device.       |
+// |---------------|------------------------------|
+//
+// Thread Safety: SAFE
+//
+// Efficiency: HIGH
+//   This just returns a stack allocated object and consists of just a few assignments.
+mal_device_config mal_device_config_init(mal_format format, mal_uint32 channels, mal_uint32 sampleRate, mal_recv_proc onRecvCallback, mal_send_proc onSendCallback);
+
+// A simplified version of mal_device_config_init() for capture devices.
+static inline mal_device_config mal_device_config_init_capture(mal_format format, mal_uint32 channels, mal_uint32 sampleRate, mal_recv_proc onRecvCallback) { return mal_device_config_init(format, channels, sampleRate, onRecvCallback, NULL); }
+
+// A simplified version of mal_device_config_init() for playback devices.
+static inline mal_device_config mal_device_config_init_playback(mal_format format, mal_uint32 channels, mal_uint32 sampleRate, mal_send_proc onSendCallback) { return mal_device_config_init(format, channels, sampleRate, NULL, onSendCallback); }
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// SRC
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Initializes a sample rate conversion object.
+mal_result mal_src_init(mal_src_config* pConfig, mal_src_read_proc onRead, void* pUserData, mal_src* pSRC);
+
+// Dynamically adjusts the output sample rate.
+//
+// This is useful for dynamically adjust pitch. Keep in mind, however, that this will speed up or slow down the sound. If this
+// is not acceptable you will need to use your own algorithm.
+mal_result mal_src_set_output_sample_rate(mal_src* pSRC, mal_uint32 sampleRateOut);
+
+// Reads a number of frames.
+//
+// Returns the number of frames actually read.
+mal_uint32 mal_src_read_frames(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut);
+
+// The same mal_src_read_frames() with extra control over whether or not the internal buffers should be flushed at the end.
+//
+// Internally there exists a buffer that keeps track of the previous and next samples for sample rate conversion. The simple
+// version of this function does _not_ flush this buffer because otherwise it causes clitches for streaming based conversion
+// pipelines. The problem, however, is that sometimes you need those last few samples (such as if you're doing a bulk conversion
+// of a static file). Enabling flushing will fix this for you.
+mal_uint32 mal_src_read_frames_ex(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// DSP
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Initializes a DSP object.
+mal_result mal_dsp_init(mal_dsp_config* pConfig, mal_dsp_read_proc onRead, void* pUserData, mal_dsp* pDSP);
+
+// Dynamically adjusts the output sample rate.
+//
+// This is useful for dynamically adjust pitch. Keep in mind, however, that this will speed up or slow down the sound. If this
+// is not acceptable you will need to use your own algorithm.
+mal_result mal_dsp_set_output_sample_rate(mal_dsp* pDSP, mal_uint32 sampleRateOut);
+
+// Reads a number of frames and runs them through the DSP processor.
+//
+// This this _not_ flush the internal buffers which means you may end up with a few less frames than you may expect. Look at
+// mal_dsp_read_frames_ex() if you want to flush the buffers at the end of the read.
+mal_uint32 mal_dsp_read_frames(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut);
+
+// The same mal_dsp_read_frames() with extra control over whether or not the internal buffers should be flushed at the end.
+//
+// See documentation for mal_src_read_frames_ex() for an explanation on flushing.
+mal_uint32 mal_dsp_read_frames_ex(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush);
+
+// High-level helper for doing a full format conversion in one go. Returns the number of output frames. Call this with pOut set to NULL to
+// determine the required size of the output buffer.
+//
+// A return value of 0 indicates an error.
+//
+// This function is useful for one-off bulk conversions, but if you're streaming data you should use the DSP APIs instead.
+mal_uint32 mal_convert_frames(void* pOut, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut, const void* pIn, mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_uint32 frameCountIn);
+
+// Helper for initializing a mal_dsp_config object.
+mal_dsp_config mal_dsp_config_init(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut);
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Utiltities
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Creates a mutex.
+//
+// A mutex must be created from a valid context. A mutex is initially unlocked.
+mal_result mal_mutex_init(mal_context* pContext, mal_mutex* pMutex);
+
+// Deletes a mutex.
+void mal_mutex_uninit(mal_mutex* pMutex);
+
+// Locks a mutex with an infinite timeout.
+void mal_mutex_lock(mal_mutex* pMutex);
+
+// Unlocks a mutex.
+void mal_mutex_unlock(mal_mutex* pMutex);
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Miscellaneous Helpers
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Retrieves a friendly name for a backend.
+const char* mal_get_backend_name(mal_backend backend);
+
+// Retrieves a friendly name for a format.
+const char* mal_get_format_name(mal_format format);
+
+// Blends two frames in floating point format.
+void mal_blend_f32(float* pOut, float* pInA, float* pInB, float factor, mal_uint32 channels);
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Format Conversion
+//
+///////////////////////////////////////////////////////////////////////////////
+void mal_pcm_u8_to_s16(short* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_s24(void* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_s32(int* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_f32(float* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_s16_to_u8(unsigned char* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_s24(void* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_s32(int* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_f32(float* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s24_to_u8(unsigned char* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_s16(short* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_s32(int* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_f32(float* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s32_to_u8(unsigned char* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_s16(short* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_s24(void* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_f32(float* pOut, const int* pIn, unsigned int count);
+void mal_pcm_f32_to_u8(unsigned char* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s16(short* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s24(void* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s32(int* pOut, const float* pIn, unsigned int count);
+void mal_pcm_convert(void* pOut, mal_format formatOut, const void* pIn, mal_format formatIn, unsigned int sampleCount);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  //mini_al_h
+
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+//
+// IMPLEMENTATION
+//
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_IMPLEMENTATION
+#include <assert.h>
+
+#ifdef MAL_WIN32
+#include <windows.h>
+#else
+#include <stdlib.h> // For malloc()/free()
+#include <string.h> // For memset()
+#endif
+
+#ifdef MAL_POSIX
+#include <unistd.h>
+#include <dlfcn.h>
+#endif
+
+#if !defined(MAL_64BIT) && !defined(MAL_32BIT)
+#ifdef _WIN32
+#ifdef _WIN64
+#define MAL_64BIT
+#else
+#define MAL_32BIT
+#endif
+#endif
+#endif
+
+#if !defined(MAL_64BIT) && !defined(MAL_32BIT)
+#ifdef __GNUC__
+#ifdef __LP64__
+#define MAL_64BIT
+#else
+#define MAL_32BIT
+#endif
+#endif
+#endif
+
+#if !defined(MAL_64BIT) && !defined(MAL_32BIT)
+#include <stdint.h>
+#if INTPTR_MAX == INT64_MAX
+#define MAL_64BIT
+#else
+#define MAL_32BIT
+#endif
+#endif
+
+
+// Disable run-time linking on certain backends.
+#ifndef MAL_NO_RUNTIME_LINKING
+    #if defined(MAL_ANDROID) || defined(MAL_EMSCRIPTEN)
+        #define MAL_NO_RUNTIME_LINKING
+    #endif
+#endif
+
+// Check if we have the necessary development packages for each backend at the top so we can use this to determine whether or not
+// certain unused functions and variables can be excluded from the build to avoid warnings.
+#ifdef MAL_ENABLE_WASAPI
+    #define MAL_HAS_WASAPI
+    #ifdef __has_include
+        #if !__has_include(<audioclient.h>)
+            #undef MAL_HAS_WASAPI
+        #endif
+    #endif
+#endif
+#ifdef MAL_ENABLE_DSOUND
+    #define MAL_HAS_DSOUND
+    #ifdef __has_include
+        #if !__has_include(<dsound.h>)
+            #undef MAL_HAS_DSOUND
+        #endif
+    #endif
+#endif
+#ifdef MAL_ENABLE_WINMM
+    #define MAL_HAS_WINMM   // Every compiler I'm aware of supports WinMM.
+#endif
+#ifdef MAL_ENABLE_ALSA
+    #define MAL_HAS_ALSA
+    #ifdef __has_include
+        #if !__has_include(<alsa/asoundlib.h>)
+            #undef MAL_HAS_ALSA
+        #endif
+    #endif
+#endif
+#ifdef MAL_ENABLE_COREAUDIO
+    #define MAL_HAS_COREAUDIO
+#endif
+#ifdef MAL_ENABLE_OSS
+    #define MAL_HAS_OSS     // OSS is the only supported backend for Unix and BSD, so it must be present else this library is useless.
+#endif
+#ifdef MAL_ENABLE_OPENSL
+    #define MAL_HAS_OPENSL  // Like OSS, OpenSL is the only supported backend for Android. It must be present.
+#endif
+#ifdef MAL_ENABLE_OPENAL
+    #define MAL_HAS_OPENAL  // mini_al inlines the necessary OpenAL stuff.
+#endif
+#ifdef MAL_ENABLE_SDL
+    #define MAL_HAS_SDL
+
+    // SDL headers are necessary if using compile-time linking.
+    #ifdef MAL_NO_RUNTIME_LINKING
+        #ifdef __has_include
+            #ifdef MAL_EMSCRIPTEN
+                #if !__has_include(<SDL/SDL_audio.h>)
+                    #undef MAL_HAS_SDL
+                #endif
+            #else
+                #if !__has_include(<SDL2/SDL_audio.h>)
+                    #undef MAL_HAS_SDL
+                #endif
+            #endif
+        #endif
+    #endif
+#endif
+#ifdef MAL_ENABLE_NULL
+    #define MAL_HAS_NULL    // Everything supports the null backend.
+#endif
+
+
+#ifdef MAL_WIN32
+    #define MAL_THREADCALL WINAPI
+    typedef unsigned long mal_thread_result;
+#else
+    #define MAL_THREADCALL
+    typedef void* mal_thread_result;
+#endif
+typedef mal_thread_result (MAL_THREADCALL * mal_thread_entry_proc)(void* pData);
+
+#ifdef MAL_WIN32
+typedef HRESULT (WINAPI * MAL_PFN_CoInitializeEx)(LPVOID pvReserved, DWORD  dwCoInit);
+typedef void    (WINAPI * MAL_PFN_CoUninitialize)();
+typedef HRESULT (WINAPI * MAL_PFN_CoCreateInstance)(REFCLSID rclsid, LPUNKNOWN pUnkOuter, DWORD dwClsContext, REFIID riid, LPVOID *ppv);
+typedef void    (WINAPI * MAL_PFN_CoTaskMemFree)(LPVOID pv);
+typedef HRESULT (WINAPI * MAL_PFN_PropVariantClear)(PROPVARIANT *pvar);
+
+typedef HWND (WINAPI * MAL_PFN_GetForegroundWindow)();
+typedef HWND (WINAPI * MAL_PFN_GetDesktopWindow)();
+#endif
+
+
+#define MAL_STATE_UNINITIALIZED     0
+#define MAL_STATE_STOPPED           1   // The device's default state after initialization.
+#define MAL_STATE_STARTED           2   // The worker thread is in it's main loop waiting for the driver to request or deliver audio data.
+#define MAL_STATE_STARTING          3   // Transitioning from a stopped state to started.
+#define MAL_STATE_STOPPING          4   // Transitioning from a started state to stopped.
+
+
+// The default size of the device's buffer in milliseconds.
+//
+// If this is too small you may get underruns and overruns in which case you'll need to either increase
+// this value or use an explicit buffer size.
+#ifndef MAL_DEFAULT_BUFFER_SIZE_IN_MILLISECONDS
+#define MAL_DEFAULT_BUFFER_SIZE_IN_MILLISECONDS     25
+#endif
+
+// Default periods when none is specified in mal_device_init(). More periods means more work on the CPU.
+#ifndef MAL_DEFAULT_PERIODS
+#define MAL_DEFAULT_PERIODS                         2
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Standard Library Stuff
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifndef mal_zero_memory
+#ifdef MAL_WIN32
+#define mal_zero_memory(p, sz) ZeroMemory((p), (sz))
+#else
+#define mal_zero_memory(p, sz) memset((p), 0, (sz))
+#endif
+#endif
+
+#define mal_zero_object(p) mal_zero_memory((p), sizeof(*(p)))
+
+#ifndef mal_copy_memory
+#ifdef MAL_WIN32
+#define mal_copy_memory(dst, src, sz) CopyMemory((dst), (src), (sz))
+#else
+#define mal_copy_memory(dst, src, sz) memcpy((dst), (src), (sz))
+#endif
+#endif
+
+#ifndef mal_malloc
+#ifdef MAL_WIN32
+#define mal_malloc(sz) HeapAlloc(GetProcessHeap(), 0, (sz))
+#else
+#define mal_malloc(sz) malloc((sz))
+#endif
+#endif
+
+#ifndef mal_realloc
+#ifdef MAL_WIN32
+#define mal_realloc(p, sz) (((sz) > 0) ? ((p) ? HeapReAlloc(GetProcessHeap(), 0, (p), (sz)) : HeapAlloc(GetProcessHeap(), 0, (sz))) : ((VOID*)(SIZE_T)(HeapFree(GetProcessHeap(), 0, (p)) & 0)))
+#else
+#define mal_realloc(p, sz) realloc((p), (sz))
+#endif
+#endif
+
+#ifndef mal_free
+#ifdef MAL_WIN32
+#define mal_free(p) HeapFree(GetProcessHeap(), 0, (p))
+#else
+#define mal_free(p) free((p))
+#endif
+#endif
+
+#ifndef mal_assert
+#ifdef MAL_WIN32
+#define mal_assert(condition) assert(condition)
+#else
+#define mal_assert(condition) assert(condition)
+#endif
+#endif
+
+#define mal_countof(x)  (sizeof(x) / sizeof(x[0]))
+#define mal_max(x, y)   (((x) > (y)) ? (x) : (y))
+#define mal_min(x, y)   (((x) < (y)) ? (x) : (y))
+
+#define mal_buffer_frame_capacity(buffer, channels, format) (sizeof(buffer) / mal_get_sample_size_in_bytes(format) / (channels))
+
+// Some of these string utility functions are unused on some platforms.
+#if defined(__GNUC__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+// Return Values:
+//   0:  Success
+//   22: EINVAL
+//   34: ERANGE
+//
+// Not using symbolic constants for errors because I want to avoid #including errno.h
+static int mal_strcpy_s(char* dst, size_t dstSizeInBytes, const char* src)
+{
+    if (dst == 0) {
+        return 22;
+    }
+    if (dstSizeInBytes == 0) {
+        return 34;
+    }
+    if (src == 0) {
+        dst[0] = '\0';
+        return 22;
+    }
+
+    size_t i;
+    for (i = 0; i < dstSizeInBytes && src[i] != '\0'; ++i) {
+        dst[i] = src[i];
+    }
+
+    if (i < dstSizeInBytes) {
+        dst[i] = '\0';
+        return 0;
+    }
+
+    dst[0] = '\0';
+    return 34;
+}
+
+static int mal_strncpy_s(char* dst, size_t dstSizeInBytes, const char* src, size_t count)
+{
+    if (dst == 0) {
+        return 22;
+    }
+    if (dstSizeInBytes == 0) {
+        return 34;
+    }
+    if (src == 0) {
+        dst[0] = '\0';
+        return 22;
+    }
+
+    size_t maxcount = count;
+    if (count == ((size_t)-1) || count >= dstSizeInBytes) {        // -1 = _TRUNCATE
+        maxcount = dstSizeInBytes - 1;
+    }
+
+    size_t i;
+    for (i = 0; i < maxcount && src[i] != '\0'; ++i) {
+        dst[i] = src[i];
+    }
+
+    if (src[i] == '\0' || i == count || count == ((size_t)-1)) {
+        dst[i] = '\0';
+        return 0;
+    }
+
+    dst[0] = '\0';
+    return 34;
+}
+
+static int mal_strcat_s(char* dst, size_t dstSizeInBytes, const char* src)
+{
+    if (dst == 0) {
+        return 22;
+    }
+    if (dstSizeInBytes == 0) {
+        return 34;
+    }
+    if (src == 0) {
+        dst[0] = '\0';
+        return 22;
+    }
+
+    char* dstorig = dst;
+
+    while (dstSizeInBytes > 0 && dst[0] != '\0') {
+        dst += 1;
+        dstSizeInBytes -= 1;
+    }
+
+    if (dstSizeInBytes == 0) {
+        return 22;  // Unterminated.
+    }
+
+
+    while (dstSizeInBytes > 0 && src[0] != '\0') {
+        *dst++ = *src++;
+        dstSizeInBytes -= 1;
+    }
+
+    if (dstSizeInBytes > 0) {
+        dst[0] = '\0';
+    } else {
+        dstorig[0] = '\0';
+        return 34;
+    }
+
+    return 0;
+}
+
+static int mal_itoa_s(int value, char* dst, size_t dstSizeInBytes, int radix)
+{
+    if (dst == NULL || dstSizeInBytes == 0) {
+        return 22;
+    }
+    if (radix < 2 || radix > 36) {
+        dst[0] = '\0';
+        return 22;
+    }
+
+    int sign = (value < 0 && radix == 10) ? -1 : 1;     // The negative sign is only used when the base is 10.
+
+    unsigned int valueU;
+    if (value < 0) {
+        valueU = -value;
+    } else {
+        valueU = value;
+    }
+
+    char* dstEnd = dst;
+    do
+    {
+        int remainder = valueU % radix;
+        if (remainder > 9) {
+            *dstEnd = (char)((remainder - 10) + 'a');
+        } else {
+            *dstEnd = (char)(remainder + '0');
+        }
+
+        dstEnd += 1;
+        dstSizeInBytes -= 1;
+        valueU /= radix;
+    } while (dstSizeInBytes > 0 && valueU > 0);
+
+    if (dstSizeInBytes == 0) {
+        dst[0] = '\0';
+        return 22;  // Ran out of room in the output buffer.
+    }
+
+    if (sign < 0) {
+        *dstEnd++ = '-';
+        dstSizeInBytes -= 1;
+    }
+
+    if (dstSizeInBytes == 0) {
+        dst[0] = '\0';
+        return 22;  // Ran out of room in the output buffer.
+    }
+
+    *dstEnd = '\0';
+
+
+    // At this point the string will be reversed.
+    dstEnd -= 1;
+    while (dst < dstEnd) {
+        char temp = *dst;
+        *dst = *dstEnd;
+        *dstEnd = temp;
+
+        dst += 1;
+        dstEnd -= 1;
+    }
+
+    return 0;
+}
+
+static int mal_strcmp(const char* str1, const char* str2)
+{
+    if (str1 == str2) return  0;
+
+    // These checks differ from the standard implementation. It's not important, but I prefer
+    // it just for sanity.
+    if (str1 == NULL) return -1;
+    if (str2 == NULL) return  1;
+
+    for (;;) {
+        if (str1[0] == '\0') {
+            break;
+        }
+        if (str1[0] != str2[0]) {
+            break;
+        }
+
+        str1 += 1;
+        str2 += 1;
+    }
+
+    return ((unsigned char*)str1)[0] - ((unsigned char*)str2)[0];
+}
+#if defined(__GNUC__)
+    #pragma GCC diagnostic pop
+#endif
+
+
+// Thanks to good old Bit Twiddling Hacks for this one: http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+static inline unsigned int mal_next_power_of_2(unsigned int x)
+{
+    x--;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    x++;
+
+    return x;
+}
+
+static inline unsigned int mal_prev_power_of_2(unsigned int x)
+{
+    return mal_next_power_of_2(x) >> 1;
+}
+
+static inline unsigned int mal_round_to_power_of_2(unsigned int x)
+{
+    unsigned int prev = mal_prev_power_of_2(x);
+    unsigned int next = mal_next_power_of_2(x);
+    if ((next - x) > (x - prev)) {
+        return prev;
+    } else {
+        return next;
+    }
+}
+
+
+
+// Clamps an f32 sample to -1..1
+static inline float mal_clip_f32(float x)
+{
+    if (x < -1) return -1;
+    if (x > +1) return +1;
+    return x;
+}
+
+static inline float mal_mix_f32(float x, float y, float a)
+{
+    return x*(1-a) + y*a;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Atomics
+//
+///////////////////////////////////////////////////////////////////////////////
+#if defined(_WIN32) && defined(_MSC_VER)
+#define mal_memory_barrier()            MemoryBarrier()
+#define mal_atomic_exchange_32(a, b)    InterlockedExchange((LONG*)a, (LONG)b)
+#define mal_atomic_exchange_64(a, b)    InterlockedExchange64((LONGLONG*)a, (LONGLONG)b)
+#define mal_atomic_increment_32(a)      InterlockedIncrement((LONG*)a)
+#define mal_atomic_decrement_32(a)      InterlockedDecrement((LONG*)a)
+#else
+#define mal_memory_barrier()            __sync_synchronize()
+#define mal_atomic_exchange_32(a, b)    (void)__sync_lock_test_and_set(a, b); __sync_synchronize()
+#define mal_atomic_exchange_64(a, b)    (void)__sync_lock_test_and_set(a, b); __sync_synchronize()
+#define mal_atomic_increment_32(a)      __sync_add_and_fetch(a, 1)
+#define mal_atomic_decrement_32(a)      __sync_sub_and_fetch(a, 1)
+#endif
+
+#ifdef MAL_64BIT
+#define mal_atomic_exchange_ptr mal_atomic_exchange_64
+#endif
+#ifdef MAL_32BIT
+#define mal_atomic_exchange_ptr mal_atomic_exchange_32
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Timing
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_WIN32
+static LARGE_INTEGER g_mal_TimerFrequency = {{0}};
+void mal_timer_init(mal_timer* pTimer)
+{
+    if (g_mal_TimerFrequency.QuadPart == 0) {
+        QueryPerformanceFrequency(&g_mal_TimerFrequency);
+    }
+
+    LARGE_INTEGER counter;
+    QueryPerformanceCounter(&counter);
+    pTimer->counter = (mal_uint64)counter.QuadPart;
+}
+
+double mal_timer_get_time_in_seconds(mal_timer* pTimer)
+{
+    LARGE_INTEGER counter;
+    if (!QueryPerformanceCounter(&counter)) {
+        return 0;
+    }
+
+    return (counter.QuadPart - pTimer->counter) / (double)g_mal_TimerFrequency.QuadPart;
+}
+#else
+void mal_timer_init(mal_timer* pTimer)
+{
+    struct timespec newTime;
+    clock_gettime(CLOCK_MONOTONIC, &newTime);
+
+    pTimer->counter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec;
+}
+
+double mal_timer_get_time_in_seconds(mal_timer* pTimer)
+{
+    struct timespec newTime;
+    clock_gettime(CLOCK_MONOTONIC, &newTime);
+
+    uint64_t newTimeCounter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec;
+    uint64_t oldTimeCounter = pTimer->counter;
+
+    return (newTimeCounter - oldTimeCounter) / 1000000000.0;
+}
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Dynamic Linking
+//
+///////////////////////////////////////////////////////////////////////////////
+mal_handle mal_dlopen(const char* filename)
+{
+#ifdef _WIN32
+#ifdef MAL_WIN32_DESKTOP
+    return (mal_handle)LoadLibraryA(filename);
+#else
+    // *sigh* It appears there is no ANSI version of LoadPackagedLibrary()...
+    WCHAR filenameW[4096];
+    if (MultiByteToWideChar(CP_UTF8, 0, filename, -1, filenameW, sizeof(filenameW)) == 0) {
+        return NULL;
+    }
+
+    return (mal_handle)LoadPackagedLibrary(filenameW, 0);
+#endif
+#else
+    return (mal_handle)dlopen(filename, RTLD_NOW);
+#endif
+}
+
+void mal_dlclose(mal_handle handle)
+{
+#ifdef _WIN32
+    FreeLibrary((HMODULE)handle);
+#else
+    dlclose((void*)handle);
+#endif
+}
+
+mal_proc mal_dlsym(mal_handle handle, const char* symbol)
+{
+#ifdef _WIN32
+    return (mal_proc)GetProcAddress((HMODULE)handle, symbol);
+#else
+    return (mal_proc)dlsym((void*)handle, symbol);
+#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Threading
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_WIN32
+mal_result mal_thread_create__win32(mal_context* pContext, mal_thread* pThread, mal_thread_entry_proc entryProc, void* pData)
+{
+    (void)pContext;
+
+    pThread->win32.hThread = CreateThread(NULL, 0, entryProc, pData, 0, NULL);
+    if (pThread->win32.hThread == NULL) {
+        return MAL_FAILED_TO_CREATE_THREAD;
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_thread_wait__win32(mal_thread* pThread)
+{
+    WaitForSingleObject(pThread->win32.hThread, INFINITE);
+}
+
+void mal_sleep__win32(mal_uint32 milliseconds)
+{
+    Sleep((DWORD)milliseconds);
+}
+
+
+mal_result mal_mutex_init__win32(mal_context* pContext, mal_mutex* pMutex)
+{
+    (void)pContext;
+
+    pMutex->win32.hMutex = CreateEventA(NULL, FALSE, TRUE, NULL);
+    if (pMutex->win32.hMutex == NULL) {
+        return MAL_FAILED_TO_CREATE_MUTEX;
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_mutex_uninit__win32(mal_mutex* pMutex)
+{
+    CloseHandle(pMutex->win32.hMutex);
+}
+
+void mal_mutex_lock__win32(mal_mutex* pMutex)
+{
+    WaitForSingleObject(pMutex->win32.hMutex, INFINITE);
+}
+
+void mal_mutex_unlock__win32(mal_mutex* pMutex)
+{
+    SetEvent(pMutex->win32.hMutex);
+}
+
+
+mal_result mal_event_init__win32(mal_context* pContext, mal_event* pEvent)
+{
+    (void)pContext;
+
+    pEvent->win32.hEvent = CreateEventW(NULL, FALSE, FALSE, NULL);
+    if (pEvent->win32.hEvent == NULL) {
+        return MAL_FAILED_TO_CREATE_EVENT;
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_event_uninit__win32(mal_event* pEvent)
+{
+    CloseHandle(pEvent->win32.hEvent);
+}
+
+mal_bool32 mal_event_wait__win32(mal_event* pEvent)
+{
+    return WaitForSingleObject(pEvent->win32.hEvent, INFINITE) == WAIT_OBJECT_0;
+}
+
+mal_bool32 mal_event_signal__win32(mal_event* pEvent)
+{
+    return SetEvent(pEvent->win32.hEvent);
+}
+#endif
+
+
+#ifdef MAL_POSIX
+typedef int (* mal_pthread_create_proc)(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+typedef int (* mal_pthread_join_proc)(pthread_t thread, void **retval);
+typedef int (* mal_pthread_mutex_init_proc)(pthread_mutex_t *__mutex, const pthread_mutexattr_t *__mutexattr);
+typedef int (* mal_pthread_mutex_destroy_proc)(pthread_mutex_t *__mutex);
+typedef int (* mal_pthread_mutex_lock_proc)(pthread_mutex_t *__mutex);
+typedef int (* mal_pthread_mutex_unlock_proc)(pthread_mutex_t *__mutex);
+typedef int (* mal_pthread_cond_init_proc)(pthread_cond_t *__restrict __cond, const pthread_condattr_t *__restrict __cond_attr);
+typedef int (* mal_pthread_cond_destroy_proc)(pthread_cond_t *__cond);
+typedef int (* mal_pthread_cond_signal_proc)(pthread_cond_t *__cond);
+typedef int (* mal_pthread_cond_wait_proc)(pthread_cond_t *__restrict __cond, pthread_mutex_t *__restrict __mutex);
+
+mal_bool32 mal_thread_create__posix(mal_context* pContext, mal_thread* pThread, mal_thread_entry_proc entryProc, void* pData)
+{
+    int result = ((mal_pthread_create_proc)pContext->posix.pthread_create)(&pThread->posix.thread, NULL, entryProc, pData);
+    if (result != 0) {
+        return MAL_FAILED_TO_CREATE_THREAD;
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_thread_wait__posix(mal_thread* pThread)
+{
+    ((mal_pthread_join_proc)pThread->pContext->posix.pthread_join)(pThread->posix.thread, NULL);
+}
+
+void mal_sleep__posix(mal_uint32 milliseconds)
+{
+    usleep(milliseconds * 1000);    // <-- usleep is in microseconds.
+}
+
+
+mal_result mal_mutex_init__posix(mal_context* pContext, mal_mutex* pMutex)
+{
+    int result = ((mal_pthread_mutex_init_proc)pContext->posix.pthread_mutex_init)(&pMutex->posix.mutex, NULL);
+    if (result != 0) {
+        return MAL_FAILED_TO_CREATE_MUTEX;
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_mutex_uninit__posix(mal_mutex* pMutex)
+{
+    ((mal_pthread_mutex_destroy_proc)pMutex->pContext->posix.pthread_mutex_destroy)(&pMutex->posix.mutex);
+}
+
+void mal_mutex_lock__posix(mal_mutex* pMutex)
+{
+    ((mal_pthread_mutex_lock_proc)pMutex->pContext->posix.pthread_mutex_lock)(&pMutex->posix.mutex);
+}
+
+void mal_mutex_unlock__posix(mal_mutex* pMutex)
+{
+    ((mal_pthread_mutex_unlock_proc)pMutex->pContext->posix.pthread_mutex_unlock)(&pMutex->posix.mutex);
+}
+
+
+mal_result mal_event_init__posix(mal_context* pContext, mal_event* pEvent)
+{
+    if (((mal_pthread_mutex_init_proc)pContext->posix.pthread_mutex_init)(&pEvent->posix.mutex, NULL) != 0) {
+        return MAL_FAILED_TO_CREATE_MUTEX;
+    }
+
+    if (((mal_pthread_cond_init_proc)pContext->posix.pthread_cond_init)(&pEvent->posix.condition, NULL) != 0) {
+        return MAL_FAILED_TO_CREATE_EVENT;
+    }
+
+    pEvent->posix.value = 0;
+    return MAL_SUCCESS;
+}
+
+void mal_event_uninit__posix(mal_event* pEvent)
+{
+    ((mal_pthread_cond_destroy_proc)pEvent->pContext->posix.pthread_cond_destroy)(&pEvent->posix.condition);
+    ((mal_pthread_mutex_destroy_proc)pEvent->pContext->posix.pthread_mutex_destroy)(&pEvent->posix.mutex);
+}
+
+mal_bool32 mal_event_wait__posix(mal_event* pEvent)
+{
+    ((mal_pthread_mutex_lock_proc)pEvent->pContext->posix.pthread_mutex_lock)(&pEvent->posix.mutex);
+    {
+        while (pEvent->posix.value == 0) {
+            ((mal_pthread_cond_wait_proc)pEvent->pContext->posix.pthread_cond_wait)(&pEvent->posix.condition, &pEvent->posix.mutex);
+        }
+
+        pEvent->posix.value = 0;  // Auto-reset.
+    }
+    ((mal_pthread_mutex_unlock_proc)pEvent->pContext->posix.pthread_mutex_unlock)(&pEvent->posix.mutex);
+
+    return MAL_TRUE;
+}
+
+mal_bool32 mal_event_signal__posix(mal_event* pEvent)
+{
+    ((mal_pthread_mutex_lock_proc)pEvent->pContext->posix.pthread_mutex_lock)(&pEvent->posix.mutex);
+    {
+        pEvent->posix.value = 1;
+        ((mal_pthread_cond_signal_proc)pEvent->pContext->posix.pthread_cond_signal)(&pEvent->posix.condition);
+    }
+    ((mal_pthread_mutex_unlock_proc)pEvent->pContext->posix.pthread_mutex_unlock)(&pEvent->posix.mutex);
+
+    return MAL_TRUE;
+}
+#endif
+
+mal_result mal_thread_create(mal_context* pContext, mal_thread* pThread, mal_thread_entry_proc entryProc, void* pData)
+{
+    if (pContext == NULL || pThread == NULL || entryProc == NULL) return MAL_FALSE;
+
+    pThread->pContext = pContext;
+
+#ifdef MAL_WIN32
+    return mal_thread_create__win32(pContext, pThread, entryProc, pData);
+#endif
+#ifdef MAL_POSIX
+    return mal_thread_create__posix(pContext, pThread, entryProc, pData);
+#endif
+}
+
+void mal_thread_wait(mal_thread* pThread)
+{
+    if (pThread == NULL) return;
+
+#ifdef MAL_WIN32
+    mal_thread_wait__win32(pThread);
+#endif
+#ifdef MAL_POSIX
+    mal_thread_wait__posix(pThread);
+#endif
+}
+
+void mal_sleep(mal_uint32 milliseconds)
+{
+#ifdef MAL_WIN32
+    mal_sleep__win32(milliseconds);
+#endif
+#ifdef MAL_POSIX
+    mal_sleep__posix(milliseconds);
+#endif
+}
+
+
+mal_result mal_mutex_init(mal_context* pContext, mal_mutex* pMutex)
+{
+    if (pContext == NULL || pMutex == NULL) return MAL_INVALID_ARGS;
+
+    pMutex->pContext = pContext;
+
+#ifdef MAL_WIN32
+    return mal_mutex_init__win32(pContext, pMutex);
+#endif
+#ifdef MAL_POSIX
+    return mal_mutex_init__posix(pContext, pMutex);
+#endif
+}
+
+void mal_mutex_uninit(mal_mutex* pMutex)
+{
+    if (pMutex == NULL || pMutex->pContext == NULL) return;
+
+#ifdef MAL_WIN32
+    mal_mutex_uninit__win32(pMutex);
+#endif
+#ifdef MAL_POSIX
+    mal_mutex_uninit__posix(pMutex);
+#endif
+}
+
+void mal_mutex_lock(mal_mutex* pMutex)
+{
+    if (pMutex == NULL || pMutex->pContext == NULL) return;
+
+#ifdef MAL_WIN32
+    mal_mutex_lock__win32(pMutex);
+#endif
+#ifdef MAL_POSIX
+    mal_mutex_lock__posix(pMutex);
+#endif
+}
+
+void mal_mutex_unlock(mal_mutex* pMutex)
+{
+    if (pMutex == NULL || pMutex->pContext == NULL) return;
+
+#ifdef MAL_WIN32
+    mal_mutex_unlock__win32(pMutex);
+#endif
+#ifdef MAL_POSIX
+    mal_mutex_unlock__posix(pMutex);
+#endif
+}
+
+
+mal_result mal_event_init(mal_context* pContext, mal_event* pEvent)
+{
+    if (pContext == NULL || pEvent == NULL) return MAL_FALSE;
+
+    pEvent->pContext = pContext;
+
+#ifdef MAL_WIN32
+    return mal_event_init__win32(pContext, pEvent);
+#endif
+#ifdef MAL_POSIX
+    return mal_event_init__posix(pContext, pEvent);
+#endif
+}
+
+void mal_event_uninit(mal_event* pEvent)
+{
+    if (pEvent == NULL || pEvent->pContext == NULL) return;
+
+#ifdef MAL_WIN32
+    mal_event_uninit__win32(pEvent);
+#endif
+#ifdef MAL_POSIX
+    mal_event_uninit__posix(pEvent);
+#endif
+}
+
+mal_bool32 mal_event_wait(mal_event* pEvent)
+{
+    if (pEvent == NULL || pEvent->pContext == NULL) return MAL_FALSE;
+
+#ifdef MAL_WIN32
+    return mal_event_wait__win32(pEvent);
+#endif
+#ifdef MAL_POSIX
+    return mal_event_wait__posix(pEvent);
+#endif
+}
+
+mal_bool32 mal_event_signal(mal_event* pEvent)
+{
+    if (pEvent == NULL || pEvent->pContext == NULL) return MAL_FALSE;
+
+#ifdef MAL_WIN32
+    return mal_event_signal__win32(pEvent);
+#endif
+#ifdef MAL_POSIX
+    return mal_event_signal__posix(pEvent);
+#endif
+}
+
+
+// Posts a log message.
+static void mal_log(mal_context* pContext, mal_device* pDevice, const char* message)
+{
+    if (pContext == NULL) return;
+
+    mal_log_proc onLog = pContext->config.onLog;
+    if (onLog) {
+        onLog(pContext, pDevice, message);
+    }
+}
+
+// Posts an error. Throw a breakpoint in here if you're needing to debug. The return value is always "resultCode".
+static mal_result mal_context_post_error(mal_context* pContext, mal_device* pDevice, const char* message, mal_result resultCode)
+{
+    // Derive the context from the device if necessary.
+    if (pContext == NULL) {
+        if (pDevice != NULL) {
+            pContext = pDevice->pContext;
+        }
+    }
+
+    mal_log(pContext, pDevice, message);
+    return resultCode;
+}
+
+static mal_result mal_post_error(mal_device* pDevice, const char* message, mal_result resultCode)
+{
+    return mal_context_post_error(NULL, pDevice, message, resultCode);
+}
+
+
+#if !defined(MAL_ANDROID)
+static void mal_get_default_channel_mapping(mal_backend backend, mal_uint32 channels, mal_channel channelMap[MAL_MAX_CHANNELS])
+{
+    if (channels == 1) {           // Mono
+        channelMap[0] = MAL_CHANNEL_FRONT_CENTER;
+    } else if (channels == 2) {    // Stereo
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+    } else if (channels == 3) {    // 2.1
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+        channelMap[2] = MAL_CHANNEL_LFE;
+    } else if (channels == 4) {    // 4.0
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+        channelMap[2] = MAL_CHANNEL_SIDE_LEFT;
+        channelMap[3] = MAL_CHANNEL_SIDE_RIGHT;
+    } else if (channels == 5) {    // Not sure about this one. 4.1?
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+        channelMap[2] = MAL_CHANNEL_SIDE_LEFT;
+        channelMap[3] = MAL_CHANNEL_SIDE_RIGHT;
+        channelMap[4] = MAL_CHANNEL_LFE;
+    } else if (channels >= 6) {    // 5.1
+        // Some backends use different default layouts.
+        if (backend == mal_backend_wasapi || backend == mal_backend_dsound || backend == mal_backend_winmm || backend == mal_backend_oss) {
+            channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            channelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            channelMap[3] = MAL_CHANNEL_LFE;
+            channelMap[4] = MAL_CHANNEL_SIDE_LEFT;
+            channelMap[5] = MAL_CHANNEL_SIDE_RIGHT;
+        } else {
+            channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            channelMap[2] = MAL_CHANNEL_SIDE_LEFT;
+            channelMap[3] = MAL_CHANNEL_SIDE_RIGHT;
+            channelMap[4] = MAL_CHANNEL_FRONT_CENTER;
+            channelMap[5] = MAL_CHANNEL_LFE;
+        }
+
+        if (channels == 7) {    // Not sure about this one.
+            channelMap[6] = MAL_CHANNEL_BACK_CENTER;
+        } else {
+            // I don't know what mapping to use in this case, but I'm making it upwards compatible with 7.1. Good luck!
+            mal_assert(channels >= 8);
+            channelMap[6] = MAL_CHANNEL_BACK_LEFT;
+            channelMap[7] = MAL_CHANNEL_BACK_RIGHT;
+
+            // Beyond 7.1 I'm just guessing...
+            if (channels == 9) {
+                channelMap[8] = MAL_CHANNEL_BACK_CENTER;
+            } else if (channels == 10) {
+                channelMap[8] = MAL_CHANNEL_FRONT_LEFT_CENTER;
+                channelMap[9] = MAL_CHANNEL_FRONT_RIGHT_CENTER;
+            } else if (channels == 11) {
+                channelMap[ 8] = MAL_CHANNEL_FRONT_LEFT_CENTER;
+                channelMap[ 9] = MAL_CHANNEL_FRONT_RIGHT_CENTER;
+                channelMap[10] = MAL_CHANNEL_BACK_CENTER;
+            } else {
+                mal_assert(channels >= 12);
+                for (mal_uint8 iChannel = 11; iChannel < channels && iChannel < MAL_MAX_CHANNELS; ++iChannel) {
+                    channelMap[iChannel] = iChannel + 1;
+                }
+            }
+        }
+    }
+}
+#endif
+
+
+// The callback for reading from the client -> DSP -> device.
+static inline mal_uint32 mal_device__on_read_from_client(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
+{
+    (void)pDSP;
+
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+
+    mal_send_proc onSend = pDevice->onSend;
+    if (onSend) {
+        return onSend(pDevice, frameCount, pFramesOut);
+    }
+
+    return 0;
+}
+
+// The callback for reading from the device -> DSP -> client.
+static inline mal_uint32 mal_device__on_read_from_device(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
+{
+    (void)pDSP;
+
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->_dspFrameCount == 0) {
+        return 0;   // Nothing left.
+    }
+
+    mal_uint32 framesToRead = frameCount;
+    if (framesToRead > pDevice->_dspFrameCount) {
+        framesToRead = pDevice->_dspFrameCount;
+    }
+
+    mal_uint32 bytesToRead = framesToRead * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+    mal_copy_memory(pFramesOut, pDevice->_dspFrames, bytesToRead);
+    pDevice->_dspFrameCount -= framesToRead;
+    pDevice->_dspFrames += bytesToRead;
+
+    return framesToRead;
+}
+
+// A helper function for reading sample data from the client. Returns the number of samples read from the client. Remaining samples
+// are filled with silence.
+static inline mal_uint32 mal_device__read_frames_from_client(mal_device* pDevice, mal_uint32 frameCount, void* pSamples)
+{
+    mal_assert(pDevice != NULL);
+    mal_assert(frameCount > 0);
+    mal_assert(pSamples != NULL);
+
+    mal_uint32 framesRead = mal_dsp_read_frames(&pDevice->dsp, frameCount, pSamples);
+    mal_uint32 samplesRead = framesRead * pDevice->internalChannels;
+    mal_uint32 sampleSize = mal_get_sample_size_in_bytes(pDevice->internalFormat);
+    mal_uint32 consumedBytes = samplesRead*sampleSize;
+    mal_uint32 remainingBytes = ((frameCount * pDevice->internalChannels) - samplesRead)*sampleSize;
+    mal_zero_memory((mal_uint8*)pSamples + consumedBytes, remainingBytes);
+
+    return samplesRead;
+}
+
+// A helper for sending sample data to the client.
+static inline void mal_device__send_frames_to_client(mal_device* pDevice, mal_uint32 frameCount, const void* pSamples)
+{
+    mal_assert(pDevice != NULL);
+    mal_assert(frameCount > 0);
+    mal_assert(pSamples != NULL);
+
+    mal_recv_proc onRecv = pDevice->onRecv;
+    if (onRecv) {
+        pDevice->_dspFrameCount = frameCount;
+        pDevice->_dspFrames = (const mal_uint8*)pSamples;
+
+        mal_uint8 chunkBuffer[4096];
+        mal_uint32 chunkFrameCount = sizeof(chunkBuffer) / mal_get_sample_size_in_bytes(pDevice->format) / pDevice->channels;
+
+        for (;;) {
+            mal_uint32 framesJustRead = mal_dsp_read_frames(&pDevice->dsp, chunkFrameCount, chunkBuffer);
+            if (framesJustRead == 0) {
+                break;
+            }
+
+            onRecv(pDevice, framesJustRead, chunkBuffer);
+
+            if (framesJustRead < chunkFrameCount) {
+                break;
+            }
+        }
+    }
+}
+
+// A helper for changing the state of the device.
+static inline void mal_device__set_state(mal_device* pDevice, mal_uint32 newState)
+{
+    mal_atomic_exchange_32(&pDevice->state, newState);
+}
+
+// A helper for getting the state of the device.
+static inline mal_uint32 mal_device__get_state(mal_device* pDevice)
+{
+    return pDevice->state;
+}
+
+
+#ifdef MAL_WIN32
+    #if defined(MAL_HAS_WASAPI) || defined(MAL_HAS_DSOUND)
+    static GUID MAL_GUID_KSDATAFORMAT_SUBTYPE_PCM        = {0x00000001, 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}};
+    static GUID MAL_GUID_KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = {0x00000003, 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}};
+    //static GUID MAL_GUID_KSDATAFORMAT_SUBTYPE_ALAW       = {0x00000006, 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}};
+    //static GUID MAL_GUID_KSDATAFORMAT_SUBTYPE_MULAW      = {0x00000007, 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}};
+    #endif
+#endif
+
+
+// Generic function for retrieving the name of a device by it's ID.
+//
+// This function simply enumerates every device and then retrieves the name of the first device that has the same ID.
+static mal_result mal_context__try_get_device_name_by_id(mal_context* pContext, mal_device_type type, const mal_device_id* pDeviceID, char* pName, size_t nameBufferSize)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pName != NULL);
+
+    if (pDeviceID == NULL) {
+        return MAL_NO_DEVICE;
+    }
+
+    mal_uint32 deviceCount;
+    mal_result result = mal_enumerate_devices(pContext, type, &deviceCount, NULL);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+
+    mal_device_info* pInfos = (mal_device_info*)mal_malloc(sizeof(*pInfos) * deviceCount);
+    if (pInfos == NULL) {
+        return MAL_OUT_OF_MEMORY;
+    }
+
+    result = mal_enumerate_devices(pContext, type, &deviceCount, pInfos);
+    if (result != MAL_SUCCESS) {
+        mal_free(pInfos);
+        return result;
+    }
+
+    mal_bool32 found = MAL_FALSE;
+    for (mal_uint32 iDevice = 0; iDevice < deviceCount; ++iDevice) {
+        // Prefer backend specific comparisons for efficiency and accuracy, but fall back to a generic method if a backend-specific comparison
+        // is not implemented.
+        switch (pContext->backend)
+        {
+        #ifdef MAL_HAS_WASAPI
+            case mal_backend_wasapi:
+            {
+                if (memcmp(pDeviceID->wasapi, &pInfos[iDevice].id.wasapi, sizeof(pDeviceID->wasapi)) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_DSOUND
+            case mal_backend_dsound:
+            {
+                if (memcmp(pDeviceID->dsound, &pInfos[iDevice].id.dsound, sizeof(pDeviceID->dsound)) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_WINMM
+            case mal_backend_winmm:
+            {
+                if (pInfos[iDevice].id.winmm == pDeviceID->winmm) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_ALSA
+            case mal_backend_alsa:
+            {
+                if (mal_strcmp(pInfos[iDevice].id.alsa, pDeviceID->alsa) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_COREAUDIO
+            case mal_backend_coreaudio
+            {
+                // TODO: Implement me.
+            } break;
+        #endif
+        #ifdef MAL_HAS_OSS
+            case mal_backend_oss:
+            {
+                if (mal_strcmp(pInfos[iDevice].id.oss, pDeviceID->oss) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_OPENSL
+            case mal_backend_opensl:
+            {
+                if (pInfos[iDevice].id.opensl == pDeviceID->opensl) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_OPENAL
+            case mal_backend_openal:
+            {
+                if (mal_strcmp(pInfos[iDevice].id.openal, pDeviceID->openal) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_SDL
+            case mal_backend_sdl:
+            {
+                if (pInfos[iDevice].id.sdl == pDeviceID->sdl) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+        #ifdef MAL_HAS_NULL
+            case mal_backend_null:
+            {
+                if (pInfos[iDevice].id.nullbackend == pDeviceID->nullbackend) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        #endif
+
+            // Fall back to a generic memory comparison.
+            default:
+            {
+                if (memcmp(pDeviceID, &pInfos[iDevice].id, sizeof(*pDeviceID)) == 0) {
+                    found = MAL_TRUE;
+                }
+            } break;
+        }
+
+        if (found) {
+            mal_strncpy_s(pName, nameBufferSize, pInfos[iDevice].name, (size_t)-1);
+            result = MAL_SUCCESS;
+            break;
+        }
+    }
+
+    mal_free(pInfos);
+    return result;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Null Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_NULL
+mal_result mal_context_init__null(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    // The null backend always works.
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__null(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_null);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_enumerate_devices__null(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 1;    // There's only one "device" each for playback and recording for the null backend.
+
+    if (pInfo != NULL && infoSize > 0) {
+        mal_zero_object(pInfo);
+
+        if (type == mal_device_type_playback) {
+            mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "NULL Playback Device", (size_t)-1);
+        } else {
+            mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "NULL Capture Device", (size_t)-1);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    mal_free(pDevice->null_device.pBuffer);
+}
+
+static mal_result mal_device_init__null(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+    (void)type;
+    (void)pDeviceID;
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->null_device);
+
+    pDevice->bufferSizeInFrames = pConfig->bufferSizeInFrames;
+    pDevice->periods = pConfig->periods;
+
+    pDevice->null_device.pBuffer = (mal_uint8*)mal_malloc(pDevice->bufferSizeInFrames * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format));
+    if (pDevice->null_device.pBuffer == NULL) {
+        return MAL_OUT_OF_MEMORY;
+    }
+
+    mal_zero_memory(pDevice->null_device.pBuffer, mal_device_get_buffer_size_in_bytes(pDevice));
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__start_backend__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_timer_init(&pDevice->null_device.timer);
+    pDevice->null_device.lastProcessedFrame = 0;
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    (void)pDevice;
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->null_device.breakFromMainLoop = MAL_TRUE;
+    return MAL_SUCCESS;
+}
+
+static mal_bool32 mal_device__get_current_frame__null(mal_device* pDevice, mal_uint32* pCurrentPos)
+{
+    mal_assert(pDevice != NULL);
+    mal_assert(pCurrentPos != NULL);
+    *pCurrentPos = 0;
+
+    mal_uint64 currentFrameAbs = (mal_uint64)(mal_timer_get_time_in_seconds(&pDevice->null_device.timer) * pDevice->sampleRate) / pDevice->channels;
+
+    *pCurrentPos = (mal_uint32)(currentFrameAbs % pDevice->bufferSizeInFrames);
+    return MAL_TRUE;
+}
+
+static mal_uint32 mal_device__get_available_frames__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_uint32 currentFrame;
+    if (!mal_device__get_current_frame__null(pDevice, &currentFrame)) {
+        return 0;
+    }
+
+    // In a playback device the last processed frame should always be ahead of the current frame. The space between
+    // the last processed and current frame (moving forward, starting from the last processed frame) is the amount
+    // of space available to write.
+    //
+    // For a recording device it's the other way around - the last processed frame is always _behind_ the current
+    // frame and the space between is the available space.
+    mal_uint32 totalFrameCount = pDevice->bufferSizeInFrames;
+    if (pDevice->type == mal_device_type_playback) {
+        mal_uint32 committedBeg = currentFrame;
+        mal_uint32 committedEnd = pDevice->null_device.lastProcessedFrame;
+        if (committedEnd <= committedBeg) {
+            committedEnd += totalFrameCount;    // Wrap around.
+        }
+
+        mal_uint32 committedSize = (committedEnd - committedBeg);
+        mal_assert(committedSize <= totalFrameCount);
+
+        return totalFrameCount - committedSize;
+    } else {
+        mal_uint32 validBeg = pDevice->null_device.lastProcessedFrame;
+        mal_uint32 validEnd = currentFrame;
+        if (validEnd < validBeg) {
+            validEnd += totalFrameCount;        // Wrap around.
+        }
+
+        mal_uint32 validSize = (validEnd - validBeg);
+        mal_assert(validSize <= totalFrameCount);
+
+        return validSize;
+    }
+}
+
+static mal_uint32 mal_device__wait_for_frames__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    while (!pDevice->null_device.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__get_available_frames__null(pDevice);
+        if (framesAvailable > 0) {
+            return framesAvailable;
+        }
+
+        mal_sleep(16);
+    }
+
+    // We'll get here if the loop was terminated. Just return whatever's available.
+    return mal_device__get_available_frames__null(pDevice);
+}
+
+static mal_result mal_device__main_loop__null(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->null_device.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->null_device.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__null(pDevice);
+        if (framesAvailable == 0) {
+            continue;
+        }
+
+        // If it's a playback device, don't bother grabbing more data if the device is being stopped.
+        if (pDevice->null_device.breakFromMainLoop && pDevice->type == mal_device_type_playback) {
+            return MAL_FALSE;
+        }
+
+        if (framesAvailable + pDevice->null_device.lastProcessedFrame > pDevice->bufferSizeInFrames) {
+            framesAvailable = pDevice->bufferSizeInFrames - pDevice->null_device.lastProcessedFrame;
+        }
+
+        mal_uint32 sampleCount = framesAvailable * pDevice->channels;
+        mal_uint32 lockOffset  = pDevice->null_device.lastProcessedFrame * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+        mal_uint32 lockSize    = sampleCount * mal_get_sample_size_in_bytes(pDevice->format);
+
+        if (pDevice->type == mal_device_type_playback) {
+            if (pDevice->null_device.breakFromMainLoop) {
+                return MAL_FALSE;
+            }
+
+            mal_device__read_frames_from_client(pDevice, framesAvailable, pDevice->null_device.pBuffer + lockOffset);
+        } else {
+            mal_zero_memory(pDevice->null_device.pBuffer + lockOffset, lockSize);
+            mal_device__send_frames_to_client(pDevice, framesAvailable, pDevice->null_device.pBuffer + lockOffset);
+        }
+
+        pDevice->null_device.lastProcessedFrame = (pDevice->null_device.lastProcessedFrame + framesAvailable) % pDevice->bufferSizeInFrames;
+    }
+
+    return MAL_SUCCESS;
+}
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// WIN32 COMMON
+//
+///////////////////////////////////////////////////////////////////////////////
+#if defined(MAL_WIN32)
+#include "objbase.h"
+#if defined(MAL_WIN32_DESKTOP)
+    #define mal_CoInitializeEx(pContext, pvReserved, dwCoInit)                          ((MAL_PFN_CoInitializeEx)pContext->win32.CoInitializeEx)(pvReserved, dwCoInit)
+    #define mal_CoUninitialize(pContext)                                                ((MAL_PFN_CoUninitialize)pContext->win32.CoUninitialize)()
+    #define mal_CoCreateInstance(pContext, rclsid, pUnkOuter, dwClsContext, riid, ppv)  ((MAL_PFN_CoCreateInstance)pContext->win32.CoCreateInstance)(rclsid, pUnkOuter, dwClsContext, riid, ppv)
+    #define mal_CoTaskMemFree(pContext, pv)                                             ((MAL_PFN_CoTaskMemFree)pContext->win32.CoTaskMemFree)(pv)
+    #define mal_PropVariantClear(pContext, pvar)                                        ((MAL_PFN_PropVariantClear)pContext->win32.PropVariantClear)(pvar)
+#else
+    #define mal_CoInitializeEx(pContext, pvReserved, dwCoInit)                          CoInitializeEx(pvReserved, dwCoInit)
+    #define mal_CoUninitialize(pContext)                                                CoUninitialize()
+    #define mal_CoCreateInstance(pContext, rclsid, pUnkOuter, dwClsContext, riid, ppv)  CoCreateInstance(rclsid, pUnkOuter, dwClsContext, riid, ppv)
+    #define mal_CoTaskMemFree(pContext, pv)                                             CoTaskMemFree(pv)
+    #define mal_PropVariantClear(pContext, pvar)                                        PropVariantClear(pvar)
+#endif
+#endif
+
+#if defined(MAL_HAS_WASAPI) || defined(MAL_HAS_DSOUND)
+#include <mmreg.h>
+
+#ifndef SPEAKER_FRONT_LEFT
+#define SPEAKER_FRONT_LEFT            0x1
+#define SPEAKER_FRONT_RIGHT           0x2
+#define SPEAKER_FRONT_CENTER          0x4
+#define SPEAKER_LOW_FREQUENCY         0x8
+#define SPEAKER_BACK_LEFT             0x10
+#define SPEAKER_BACK_RIGHT            0x20
+#define SPEAKER_FRONT_LEFT_OF_CENTER  0x40
+#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80
+#define SPEAKER_BACK_CENTER           0x100
+#define SPEAKER_SIDE_LEFT             0x200
+#define SPEAKER_SIDE_RIGHT            0x400
+#define SPEAKER_TOP_CENTER            0x800
+#define SPEAKER_TOP_FRONT_LEFT        0x1000
+#define SPEAKER_TOP_FRONT_CENTER      0x2000
+#define SPEAKER_TOP_FRONT_RIGHT       0x4000
+#define SPEAKER_TOP_BACK_LEFT         0x8000
+#define SPEAKER_TOP_BACK_CENTER       0x10000
+#define SPEAKER_TOP_BACK_RIGHT        0x20000
+#endif
+
+// The SDK that comes with old versions of MSVC (VC6, for example) does not appear to define WAVEFORMATEXTENSIBLE. We
+// define our own implementation in this case.
+#if defined(_MSC_VER) && !defined(_WAVEFORMATEXTENSIBLE_)
+typedef struct
+{
+    WAVEFORMATEX Format;
+    union
+    {
+        WORD wValidBitsPerSample;
+        WORD wSamplesPerBlock;
+        WORD wReserved;
+    } Samples;
+    DWORD dwChannelMask;
+    GUID SubFormat;
+} WAVEFORMATEXTENSIBLE;
+#endif
+
+#ifndef WAVE_FORMAT_EXTENSIBLE
+#define WAVE_FORMAT_EXTENSIBLE 0xFFFE
+#endif
+
+// Converts an individual Win32-style channel identifier (SPEAKER_FRONT_LEFT, etc.) to mini_al.
+static mal_uint8 mal_channel_id_to_mal__win32(DWORD id)
+{
+    switch (id)
+    {
+        case SPEAKER_FRONT_LEFT:            return MAL_CHANNEL_FRONT_LEFT;
+        case SPEAKER_FRONT_RIGHT:           return MAL_CHANNEL_FRONT_RIGHT;
+        case SPEAKER_FRONT_CENTER:          return MAL_CHANNEL_FRONT_CENTER;
+        case SPEAKER_LOW_FREQUENCY:         return MAL_CHANNEL_LFE;
+        case SPEAKER_BACK_LEFT:             return MAL_CHANNEL_BACK_LEFT;
+        case SPEAKER_BACK_RIGHT:            return MAL_CHANNEL_BACK_RIGHT;
+        case SPEAKER_FRONT_LEFT_OF_CENTER:  return MAL_CHANNEL_FRONT_LEFT_CENTER;
+        case SPEAKER_FRONT_RIGHT_OF_CENTER: return MAL_CHANNEL_FRONT_RIGHT_CENTER;
+        case SPEAKER_BACK_CENTER:           return MAL_CHANNEL_BACK_CENTER;
+        case SPEAKER_SIDE_LEFT:             return MAL_CHANNEL_SIDE_LEFT;
+        case SPEAKER_SIDE_RIGHT:            return MAL_CHANNEL_SIDE_RIGHT;
+        case SPEAKER_TOP_CENTER:            return MAL_CHANNEL_TOP_CENTER;
+        case SPEAKER_TOP_FRONT_LEFT:        return MAL_CHANNEL_TOP_FRONT_LEFT;
+        case SPEAKER_TOP_FRONT_CENTER:      return MAL_CHANNEL_TOP_FRONT_CENTER;
+        case SPEAKER_TOP_FRONT_RIGHT:       return MAL_CHANNEL_TOP_FRONT_RIGHT;
+        case SPEAKER_TOP_BACK_LEFT:         return MAL_CHANNEL_TOP_BACK_LEFT;
+        case SPEAKER_TOP_BACK_CENTER:       return MAL_CHANNEL_TOP_BACK_CENTER;
+        case SPEAKER_TOP_BACK_RIGHT:        return MAL_CHANNEL_TOP_BACK_RIGHT;
+        default: return 0;
+    }
+}
+
+// Converts an individual mini_al channel identifier (MAL_CHANNEL_FRONT_LEFT, etc.) to Win32-style.
+static DWORD mal_channel_id_to_win32(DWORD id)
+{
+    switch (id)
+    {
+        case MAL_CHANNEL_FRONT_LEFT:         return SPEAKER_FRONT_LEFT;
+        case MAL_CHANNEL_FRONT_RIGHT:        return SPEAKER_FRONT_RIGHT;
+        case MAL_CHANNEL_FRONT_CENTER:       return SPEAKER_FRONT_CENTER;
+        case MAL_CHANNEL_LFE:                return SPEAKER_LOW_FREQUENCY;
+        case MAL_CHANNEL_BACK_LEFT:          return SPEAKER_BACK_LEFT;
+        case MAL_CHANNEL_BACK_RIGHT:         return SPEAKER_BACK_RIGHT;
+        case MAL_CHANNEL_FRONT_LEFT_CENTER:  return SPEAKER_FRONT_LEFT_OF_CENTER;
+        case MAL_CHANNEL_FRONT_RIGHT_CENTER: return SPEAKER_FRONT_RIGHT_OF_CENTER;
+        case MAL_CHANNEL_BACK_CENTER:        return SPEAKER_BACK_CENTER;
+        case MAL_CHANNEL_SIDE_LEFT:          return SPEAKER_SIDE_LEFT;
+        case MAL_CHANNEL_SIDE_RIGHT:         return SPEAKER_SIDE_RIGHT;
+        case MAL_CHANNEL_TOP_CENTER:         return SPEAKER_TOP_CENTER;
+        case MAL_CHANNEL_TOP_FRONT_LEFT:     return SPEAKER_TOP_FRONT_LEFT;
+        case MAL_CHANNEL_TOP_FRONT_CENTER:   return SPEAKER_TOP_FRONT_CENTER;
+        case MAL_CHANNEL_TOP_FRONT_RIGHT:    return SPEAKER_TOP_FRONT_RIGHT;
+        case MAL_CHANNEL_TOP_BACK_LEFT:      return SPEAKER_TOP_BACK_LEFT;
+        case MAL_CHANNEL_TOP_BACK_CENTER:    return SPEAKER_TOP_BACK_CENTER;
+        case MAL_CHANNEL_TOP_BACK_RIGHT:     return SPEAKER_TOP_BACK_RIGHT;
+        default: return 0;
+    }
+}
+
+// Converts a channel mapping to a Win32-style channel mask.
+static DWORD mal_channel_map_to_channel_mask__win32(const mal_uint8 channelMap[MAL_MAX_CHANNELS], mal_uint32 channels)
+{
+    DWORD dwChannelMask = 0;
+    for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
+        dwChannelMask |= mal_channel_id_to_win32(channelMap[iChannel]);
+    }
+
+    return dwChannelMask;
+}
+
+// Converts a Win32-style channel mask to a mini_al channel map.
+static void mal_channel_mask_to_channel_map__win32(DWORD dwChannelMask, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    if (channels == 1 && dwChannelMask == 0) {
+        channelMap[0] = MAL_CHANNEL_FRONT_CENTER;
+    } else if (channels == 2 && dwChannelMask == 0) {
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+    } else {
+        // Just iterate over each bit.
+        mal_uint32 iChannel = 0;
+        for (mal_uint32 iBit = 0; iBit < 32; ++iBit) {
+            DWORD bitValue = (dwChannelMask & (1 << iBit));
+            if (bitValue != 0) {
+                // The bit is set.
+                channelMap[iChannel] = mal_channel_id_to_mal__win32(bitValue);
+                iChannel += 1;
+            }
+        }
+    }
+}
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// WASAPI Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_WASAPI
+#if defined(_MSC_VER)
+    #pragma warning(push)
+    #pragma warning(disable:4091)   // 'typedef ': ignored on left of '' when no variable is declared
+#endif
+#include <audioclient.h>
+#include <audiopolicy.h>
+#include <mmdeviceapi.h>
+#if defined(_MSC_VER)
+    #pragma warning(pop)
+#endif
+
+const PROPERTYKEY g_malPKEY_Device_FriendlyName      = {{0xa45c254e, 0xdf1c, 0x4efd, {0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0}}, 14};
+const PROPERTYKEY g_malPKEY_AudioEngine_DeviceFormat = {{0xf19f064d, 0x82c,  0x4e27, {0xbc, 0x73, 0x68, 0x82, 0xa1, 0xbb, 0x8e, 0x4c}},  0};
+
+const IID g_malCLSID_MMDeviceEnumerator_Instance = {0xBCDE0395, 0xE52F, 0x467C, {0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E}}; // BCDE0395-E52F-467C-8E3D-C4579291692E = __uuidof(MMDeviceEnumerator)
+const IID g_malIID_IMMDeviceEnumerator_Instance  = {0xA95664D2, 0x9614, 0x4F35, {0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6}}; // A95664D2-9614-4F35-A746-DE8DB63617E6 = __uuidof(IMMDeviceEnumerator)
+const IID g_malIID_IAudioClient_Instance         = {0x1CB9AD4C, 0xDBFA, 0x4C32, {0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2}}; // 1CB9AD4C-DBFA-4C32-B178-C2F568A703B2 = __uuidof(IAudioClient)
+const IID g_malIID_IAudioRenderClient_Instance   = {0xF294ACFC, 0x3146, 0x4483, {0xA7, 0xBF, 0xAD, 0xDC, 0xA7, 0xC2, 0x60, 0xE2}}; // F294ACFC-3146-4483-A7BF-ADDCA7C260E2 = __uuidof(IAudioRenderClient)
+const IID g_malIID_IAudioCaptureClient_Instance  = {0xC8ADBD64, 0xE71E, 0x48A0, {0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17}}; // C8ADBD64-E71E-48A0-A4DE-185C395CD317 = __uuidof(IAudioCaptureClient)
+
+#ifndef MAL_WIN32_DESKTOP
+const IID g_malIID_DEVINTERFACE_AUDIO_RENDER  = {0xE6327CAD, 0xDCEC, 0x4949, {0xAE, 0x8A, 0x99, 0x1E, 0x97, 0x6A, 0x79, 0xD2}}; // E6327CAD-DCEC-4949-AE8A-991E976A79D2
+const IID g_malIID_DEVINTERFACE_AUDIO_CAPTURE = {0x2EEF81BE, 0x33FA, 0x4800, {0x96, 0x70, 0x1C, 0xD4, 0x74, 0x97, 0x2C, 0x3F}}; // 2EEF81BE-33FA-4800-9670-1CD474972C3F
+#endif
+
+#ifdef __cplusplus
+#define g_malCLSID_MMDeviceEnumerator g_malCLSID_MMDeviceEnumerator_Instance
+#define g_malIID_IMMDeviceEnumerator  g_malIID_IMMDeviceEnumerator_Instance
+#define g_malIID_IAudioClient         g_malIID_IAudioClient_Instance
+#define g_malIID_IAudioRenderClient   g_malIID_IAudioRenderClient_Instance
+#define g_malIID_IAudioCaptureClient  g_malIID_IAudioCaptureClient_Instance
+#else
+#define g_malCLSID_MMDeviceEnumerator &g_malCLSID_MMDeviceEnumerator_Instance
+#define g_malIID_IMMDeviceEnumerator  &g_malIID_IMMDeviceEnumerator_Instance
+#define g_malIID_IAudioClient         &g_malIID_IAudioClient_Instance
+#define g_malIID_IAudioRenderClient   &g_malIID_IAudioRenderClient_Instance
+#define g_malIID_IAudioCaptureClient  &g_malIID_IAudioCaptureClient_Instance
+#endif
+
+#ifdef __cplusplus
+#define mal_is_guid_equal(a, b) IsEqualGUID(a, b)
+#else
+#define mal_is_guid_equal(a, b) IsEqualGUID(&a, &b)
+#endif
+
+#ifdef MAL_WIN32_DESKTOP
+    // IMMDeviceEnumerator
+    #ifdef __cplusplus
+        #define IMMDeviceEnumerator_Release(p) ((IMMDeviceEnumerator*)p)->Release()
+    #else
+        #define IMMDeviceEnumerator_Release(p) ((IMMDeviceEnumerator*)p)->lpVtbl->Release((IMMDeviceEnumerator*)p)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDeviceEnumerator_EnumAudioEndpoints(p, a, b, c) ((IMMDeviceEnumerator*)p)->EnumAudioEndpoints(a, b, c)
+    #else
+        #define IMMDeviceEnumerator_EnumAudioEndpoints(p, a, b, c) ((IMMDeviceEnumerator*)p)->lpVtbl->EnumAudioEndpoints(p, a, b, c)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDeviceEnumerator_GetDefaultAudioEndpoint(p, a, b, c) ((IMMDeviceEnumerator*)p)->GetDefaultAudioEndpoint(a, b, c)
+    #else
+        #define IMMDeviceEnumerator_GetDefaultAudioEndpoint(p, a, b, c) ((IMMDeviceEnumerator*)p)->lpVtbl->GetDefaultAudioEndpoint(p, a, b, c)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDeviceEnumerator_GetDevice(p, a, b) ((IMMDeviceEnumerator*)p)->GetDevice(a, b)
+    #else
+        #define IMMDeviceEnumerator_GetDevice(p, a, b) ((IMMDeviceEnumerator*)p)->lpVtbl->GetDevice(p, a, b)
+    #endif
+
+    // IMMDeviceCollection
+    #ifdef __cplusplus
+        #define IMMDeviceCollection_Release(p) ((IMMDeviceCollection*)p)->Release()
+    #else
+        #define IMMDeviceCollection_Release(p) ((IMMDeviceCollection*)p)->lpVtbl->Release((IMMDeviceCollection*)p)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDeviceCollection_GetCount(p, a) ((IMMDeviceCollection*)p)->GetCount(a)
+    #else
+        #define IMMDeviceCollection_GetCount(p, a) ((IMMDeviceCollection*)p)->lpVtbl->GetCount((IMMDeviceCollection*)p, a)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDeviceCollection_Item(p, a, b) ((IMMDeviceCollection*)p)->Item(a, b)
+    #else
+        #define IMMDeviceCollection_Item(p, a, b) ((IMMDeviceCollection*)p)->lpVtbl->Item((IMMDeviceCollection*)p, a, b)
+    #endif
+
+    // IMMDevice
+    #ifdef __cplusplus
+        #define IMMDevice_Release(p) ((IMMDevice*)p)->Release()
+    #else
+        #define IMMDevice_Release(p) ((IMMDevice*)p)->lpVtbl->Release((IMMDevice*)p)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDevice_GetId(p, a) ((IMMDevice*)p)->GetId(a)
+    #else
+        #define IMMDevice_GetId(p, a) ((IMMDevice*)p)->lpVtbl->GetId((IMMDevice*)p, a)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDevice_OpenPropertyStore(p, a, b) ((IMMDevice*)p)->OpenPropertyStore(a, b)
+    #else
+        #define IMMDevice_OpenPropertyStore(p, a, b) ((IMMDevice*)p)->lpVtbl->OpenPropertyStore((IMMDevice*)p, a, b)
+    #endif
+    #ifdef __cplusplus
+        #define IMMDevice_Activate(p, a, b, c, d) ((IMMDevice*)p)->Activate(a, b, c, d)
+    #else
+        #define IMMDevice_Activate(p, a, b, c, d) ((IMMDevice*)p)->lpVtbl->Activate((IMMDevice*)p, a, b, c, d)
+    #endif
+#else
+    // IActivateAudioInterfaceAsyncOperation
+    #ifdef __cplusplus
+        #define IActivateAudioInterfaceAsyncOperation_Release(p) ((IActivateAudioInterfaceAsyncOperation*)p)->Release()
+    #else
+        #define IActivateAudioInterfaceAsyncOperation_Release(p) ((IActivateAudioInterfaceAsyncOperation*)p)->lpVtbl->Release((IActivateAudioInterfaceAsyncOperation*)p)
+    #endif
+    #ifdef __cplusplus
+        #define IActivateAudioInterfaceAsyncOperation_GetActivateResult(p, a, b) ((IActivateAudioInterfaceAsyncOperation*)p)->GetActivateResult(a, b)
+    #else
+        #define IActivateAudioInterfaceAsyncOperation_GetActivateResult(p, a, b) ((IActivateAudioInterfaceAsyncOperation*)p)->lpVtbl->GetActivateResult((IActivateAudioInterfaceAsyncOperation*)p, a, b)
+    #endif
+#endif
+
+// IPropertyStore
+#ifdef __cplusplus
+    #define IPropertyStore_Release(p) ((IPropertyStore*)p)->Release()
+#else
+    #define IPropertyStore_Release(p) ((IPropertyStore*)p)->lpVtbl->Release((IPropertyStore*)p)
+#endif
+#ifdef __cplusplus
+    #define IPropertyStore_GetValue(p, a, b) ((IPropertyStore*)p)->GetValue(a, b)
+#else
+    #define IPropertyStore_GetValue(p, a, b) ((IPropertyStore*)p)->lpVtbl->GetValue((IPropertyStore*)p, &a, b)
+#endif
+
+// IAudioClient
+#ifdef __cplusplus
+    #define IAudioClient_Release(p) ((IAudioClient*)p)->Release()
+#else
+    #define IAudioClient_Release(p) ((IAudioClient*)p)->lpVtbl->Release((IAudioClient*)p)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_IsFormatSupported(p, a, b, c) ((IAudioClient*)p)->IsFormatSupported(a, b, c)
+#else
+    #define IAudioClient_IsFormatSupported(p, a, b, c) ((IAudioClient*)p)->lpVtbl->IsFormatSupported((IAudioClient*)p, a, b, c)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_GetMixFormat(p, a) ((IAudioClient*)p)->GetMixFormat(a)
+#else
+    #define IAudioClient_GetMixFormat(p, a) ((IAudioClient*)p)->lpVtbl->GetMixFormat((IAudioClient*)p, a)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_Initialize(p, a, b, c, d, e, f) ((IAudioClient*)p)->Initialize(a, b, c, d, e, f)
+#else
+    #define IAudioClient_Initialize(p, a, b, c, d, e, f) ((IAudioClient*)p)->lpVtbl->Initialize((IAudioClient*)p, a, b, c, d, e, f)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_GetBufferSize(p, a) ((IAudioClient*)p)->GetBufferSize(a)
+#else
+    #define IAudioClient_GetBufferSize(p, a) ((IAudioClient*)p)->lpVtbl->GetBufferSize((IAudioClient*)p, a)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_GetService(p, a, b) ((IAudioClient*)p)->GetService(a, b)
+#else
+    #define IAudioClient_GetService(p, a, b) ((IAudioClient*)p)->lpVtbl->GetService((IAudioClient*)p, a, b)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_Start(p) ((IAudioClient*)p)->Start()
+#else
+    #define IAudioClient_Start(p) ((IAudioClient*)p)->lpVtbl->Start((IAudioClient*)p)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_Stop(p) ((IAudioClient*)p)->Stop()
+#else
+    #define IAudioClient_Stop(p) ((IAudioClient*)p)->lpVtbl->Stop((IAudioClient*)p)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_GetCurrentPadding(p, a) ((IAudioClient*)p)->GetCurrentPadding(a)
+#else
+    #define IAudioClient_GetCurrentPadding(p, a) ((IAudioClient*)p)->lpVtbl->GetCurrentPadding((IAudioClient*)p, a)
+#endif
+#ifdef __cplusplus
+    #define IAudioClient_SetEventHandle(p, a) ((IAudioClient*)p)->SetEventHandle(a)
+#else
+    #define IAudioClient_SetEventHandle(p, a) ((IAudioClient*)p)->lpVtbl->SetEventHandle((IAudioClient*)p, a)
+#endif
+
+// IAudioRenderClient
+#ifdef __cplusplus
+    #define IAudioRenderClient_Release(p) ((IAudioRenderClient*)p)->Release()
+#else
+    #define IAudioRenderClient_Release(p) ((IAudioRenderClient*)p)->lpVtbl->Release((IAudioRenderClient*)p)
+#endif
+#ifdef __cplusplus
+    #define IAudioRenderClient_GetBuffer(p, a, b) ((IAudioRenderClient*)p)->GetBuffer(a, b)
+#else
+    #define IAudioRenderClient_GetBuffer(p, a, b) ((IAudioRenderClient*)p)->lpVtbl->GetBuffer((IAudioRenderClient*)p, a, b)
+#endif
+#ifdef __cplusplus
+    #define IAudioRenderClient_ReleaseBuffer(p, a, b) ((IAudioRenderClient*)p)->ReleaseBuffer(a, b)
+#else
+    #define IAudioRenderClient_ReleaseBuffer(p, a, b) ((IAudioRenderClient*)p)->lpVtbl->ReleaseBuffer((IAudioRenderClient*)p, a, b)
+#endif
+
+// IAudioCaptureClient
+#ifdef __cplusplus
+    #define IAudioCaptureClient_Release(p) ((IAudioCaptureClient*)p)->Release()
+#else
+    #define IAudioCaptureClient_Release(p) ((IAudioCaptureClient*)p)->lpVtbl->Release((IAudioCaptureClient*)p)
+#endif
+#ifdef __cplusplus
+    #define IAudioCaptureClient_GetNextPacketSize(p, a) ((IAudioCaptureClient*)p)->GetNextPacketSize(a)
+#else
+    #define IAudioCaptureClient_GetNextPacketSize(p, a) ((IAudioCaptureClient*)p)->lpVtbl->GetNextPacketSize((IAudioCaptureClient*)p, a)
+#endif
+#ifdef __cplusplus
+    #define IAudioCaptureClient_GetBuffer(p, a, b, c, d, e) ((IAudioCaptureClient*)p)->GetBuffer(a, b, c, d, e)
+#else
+    #define IAudioCaptureClient_GetBuffer(p, a, b, c, d, e) ((IAudioCaptureClient*)p)->lpVtbl->GetBuffer((IAudioCaptureClient*)p, a, b, c, d, e)
+#endif
+#ifdef __cplusplus
+    #define IAudioCaptureClient_ReleaseBuffer(p, a) ((IAudioCaptureClient*)p)->ReleaseBuffer(a)
+#else
+    #define IAudioCaptureClient_ReleaseBuffer(p, a) ((IAudioCaptureClient*)p)->lpVtbl->ReleaseBuffer((IAudioCaptureClient*)p, a)
+#endif
+
+mal_result mal_context_init__wasapi(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    (void)pContext;
+
+#ifdef MAL_WIN32_DESKTOP
+    // WASAPI is only supported in Vista SP1 and newer. The reason for SP1 and not the base version of Vista is that event-driven
+    // exclusive mode does not work until SP1.
+    OSVERSIONINFOEXW osvi;
+    mal_zero_object(&osvi);
+    osvi.dwOSVersionInfoSize = sizeof(osvi);
+    osvi.dwMajorVersion = HIBYTE(_WIN32_WINNT_VISTA);
+    osvi.dwMinorVersion = LOBYTE(_WIN32_WINNT_VISTA);
+    osvi.wServicePackMajor = 1;
+    if (VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION | VER_SERVICEPACKMAJOR, VerSetConditionMask(VerSetConditionMask(VerSetConditionMask(0, VER_MAJORVERSION, VER_GREATER_EQUAL), VER_MINORVERSION, VER_GREATER_EQUAL), VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL))) {
+        return MAL_SUCCESS;
+    } else {
+        return MAL_NO_BACKEND;
+    }
+#else
+    return MAL_SUCCESS;
+#endif
+}
+
+mal_result mal_context_uninit__wasapi(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_wasapi);
+    (void)pContext;
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_enumerate_devices__wasapi(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+#ifdef MAL_WIN32_DESKTOP
+    IMMDeviceEnumerator* pDeviceEnumerator;
+    HRESULT hr = mal_CoCreateInstance(pContext, g_malCLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, g_malIID_IMMDeviceEnumerator, (void**)&pDeviceEnumerator);
+    if (FAILED(hr)) {
+        return mal_context_post_error(pContext, NULL, "[WASAPI] Failed to create device enumerator.", MAL_WASAPI_FAILED_TO_CREATE_DEVICE_ENUMERATOR);
+    }
+
+    IMMDeviceCollection* pDeviceCollection;
+    hr = IMMDeviceEnumerator_EnumAudioEndpoints(pDeviceEnumerator, (type == mal_device_type_playback) ? eRender : eCapture, DEVICE_STATE_ACTIVE, &pDeviceCollection);
+    if (FAILED(hr)) {
+        IMMDeviceEnumerator_Release(pDeviceEnumerator);
+        return mal_context_post_error(pContext, NULL, "[WASAPI] Failed to enumerate audio endpoints.", MAL_NO_DEVICE);
+    }
+
+    IMMDeviceEnumerator_Release(pDeviceEnumerator);
+
+    UINT count;
+    hr = IMMDeviceCollection_GetCount(pDeviceCollection, &count);
+    if (FAILED(hr)) {
+        IMMDeviceCollection_Release(pDeviceCollection);
+        return mal_context_post_error(pContext, NULL, "[WASAPI] Failed to get device count.", MAL_NO_DEVICE);
+    }
+
+    for (mal_uint32 iDevice = 0; iDevice < count; ++iDevice) {
+        if (pInfo != NULL) {
+            if (infoSize > 0) {
+                mal_zero_object(pInfo);
+
+                IMMDevice* pDevice;
+                hr = IMMDeviceCollection_Item(pDeviceCollection, iDevice, &pDevice);
+                if (SUCCEEDED(hr)) {
+                    // ID.
+                    LPWSTR id;
+                    hr = IMMDevice_GetId(pDevice, &id);
+                    if (SUCCEEDED(hr)) {
+                        size_t idlen = wcslen(id);
+                        if (idlen+sizeof(wchar_t) > sizeof(pInfo->id.wasapi)) {
+                            mal_CoTaskMemFree(pContext, id);
+                            mal_assert(MAL_FALSE);  // NOTE: If this is triggered, please report it. It means the format of the ID must haved change and is too long to fit in our fixed sized buffer.
+                            continue;
+                        }
+
+                        memcpy(pInfo->id.wasapi, id, idlen * sizeof(wchar_t));
+                        pInfo->id.wasapi[idlen] = '\0';
+
+                        mal_CoTaskMemFree(pContext, id);
+                    }
+
+                    // Description / Friendly Name.
+                    IPropertyStore *pProperties;
+                    hr = IMMDevice_OpenPropertyStore(pDevice, STGM_READ, &pProperties);
+                    if (SUCCEEDED(hr)) {
+                        PROPVARIANT varName;
+                        PropVariantInit(&varName);
+                        hr = IPropertyStore_GetValue(pProperties, g_malPKEY_Device_FriendlyName, &varName);
+                        if (SUCCEEDED(hr)) {
+                            WideCharToMultiByte(CP_UTF8, 0, varName.pwszVal, -1, pInfo->name, sizeof(pInfo->name), 0, FALSE);
+                            mal_PropVariantClear(pContext, &varName);
+                        }
+
+                        IPropertyStore_Release(pProperties);
+                    }
+                }
+
+                pInfo += 1;
+                infoSize -= 1;
+                *pCount += 1;
+            }
+        } else {
+            *pCount += 1;
+        }
+    }
+
+    IMMDeviceCollection_Release(pDeviceCollection);
+#else
+    // The MMDevice API is only supported on desktop applications. For now, while I'm still figuring out how to properly enumerate
+    // over devices without using MMDevice, I'm restricting devices to defaults.
+    if (pInfo != NULL) {
+        if (infoSize > 0) {
+            if (type == mal_device_type_playback) {
+                mal_copy_memory(pInfo->id.wasapi, &g_malIID_DEVINTERFACE_AUDIO_RENDER, sizeof(g_malIID_DEVINTERFACE_AUDIO_RENDER));
+                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Playback Device", (size_t)-1);
+            } else {
+                mal_copy_memory(pInfo->id.wasapi, &g_malIID_DEVINTERFACE_AUDIO_CAPTURE, sizeof(g_malIID_DEVINTERFACE_AUDIO_CAPTURE));
+                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Capture Device", (size_t)-1);
+            }
+
+            pInfo += 1;
+            *pCount += 1;
+        }
+    } else {
+        *pCount += 1;
+    }
+#endif
+
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->wasapi.pRenderClient) {
+        IAudioRenderClient_Release(pDevice->wasapi.pRenderClient);
+    }
+    if (pDevice->wasapi.pCaptureClient) {
+        IAudioCaptureClient_Release(pDevice->wasapi.pCaptureClient);
+    }
+    if (pDevice->wasapi.pAudioClient) {
+        IAudioClient_Release(pDevice->wasapi.pAudioClient);
+    }
+
+    if (pDevice->wasapi.hEvent) {
+        CloseHandle(pDevice->wasapi.hEvent);
+    }
+    if (pDevice->wasapi.hStopEvent) {
+        CloseHandle(pDevice->wasapi.hStopEvent);
+    }
+}
+
+#ifndef MAL_WIN32_DESKTOP
+    #ifdef __cplusplus
+    #include <wrl\implements.h>
+    class malCompletionHandler : public Microsoft::WRL::RuntimeClass< Microsoft::WRL::RuntimeClassFlags< Microsoft::WRL::ClassicCom >, Microsoft::WRL::FtmBase, IActivateAudioInterfaceCompletionHandler >
+    {
+    public:
+
+        malCompletionHandler()
+            : m_hEvent(NULL)
+        {
+        }
+
+        mal_result Init()
+        {
+            m_hEvent = CreateEventA(NULL, FALSE, FALSE, NULL);
+            if (m_hEvent == NULL) {
+                return MAL_ERROR;
+            }
+
+            return MAL_SUCCESS;
+        }
+
+        void Uninit()
+        {
+            if (m_hEvent != NULL) {
+                CloseHandle(m_hEvent);
+            }
+        }
+
+        void Wait()
+        {
+            WaitForSingleObject(m_hEvent, INFINITE);
+        }
+
+        HRESULT STDMETHODCALLTYPE ActivateCompleted(IActivateAudioInterfaceAsyncOperation *activateOperation)
+        {
+            (void)activateOperation;
+            SetEvent(m_hEvent);
+            return S_OK;
+        }
+
+    private:
+        HANDLE m_hEvent;  // This is created in Init(), deleted in Uninit(), waited on in Wait() and signaled in ActivateCompleted().
+    };
+    #else
+    #error "The UWP build is currently only supported in C++."
+    #endif
+#endif  // !MAL_WIN32_DESKTOP
+
+static mal_result mal_device_init__wasapi(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->wasapi);
+
+    HRESULT hr;
+    mal_result result = MAL_SUCCESS;
+    const char* errorMsg = "";
+    AUDCLNT_SHAREMODE shareMode = AUDCLNT_SHAREMODE_SHARED;
+
+    WAVEFORMATEXTENSIBLE wf;
+    mal_zero_object(&wf);
+    wf.Format.cbSize               = sizeof(wf);
+    wf.Format.wFormatTag           = WAVE_FORMAT_EXTENSIBLE;
+    wf.Format.nChannels            = (WORD)pDevice->channels;
+    wf.Format.nSamplesPerSec       = (DWORD)pDevice->sampleRate;
+    wf.Format.wBitsPerSample       = (WORD)mal_get_sample_size_in_bytes(pDevice->format)*8;
+    wf.Format.nBlockAlign          = (wf.Format.nChannels * wf.Format.wBitsPerSample) / 8;
+    wf.Format.nAvgBytesPerSec      = wf.Format.nBlockAlign * wf.Format.nSamplesPerSec;
+    wf.Samples.wValidBitsPerSample = wf.Format.wBitsPerSample;
+    wf.dwChannelMask               = mal_channel_map_to_channel_mask__win32(pDevice->channelMap, pDevice->channels);
+    if (pDevice->format == mal_format_f32) {
+        wf.SubFormat = MAL_GUID_KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
+    } else {
+        wf.SubFormat = MAL_GUID_KSDATAFORMAT_SUBTYPE_PCM;
+    }
+
+#ifdef MAL_WIN32_DESKTOP
+    IMMDevice* pMMDevice = NULL;
+
+    IMMDeviceEnumerator* pDeviceEnumerator;
+    hr = mal_CoCreateInstance(pContext, g_malCLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, g_malIID_IMMDeviceEnumerator, (void**)&pDeviceEnumerator);
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to create IMMDeviceEnumerator.", result = MAL_WASAPI_FAILED_TO_CREATE_DEVICE_ENUMERATOR;
+        goto done;
+    }
+
+    if (pDeviceID == NULL) {
+        hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(pDeviceEnumerator, (type == mal_device_type_playback) ? eRender : eCapture, eConsole, &pMMDevice);
+        if (FAILED(hr)) {
+            IMMDeviceEnumerator_Release(pDeviceEnumerator);
+            errorMsg = "[WASAPI] Failed to create default backend device.", result = MAL_WASAPI_FAILED_TO_CREATE_DEVICE;
+            goto done;
+        }
+    } else {
+        hr = IMMDeviceEnumerator_GetDevice(pDeviceEnumerator, pDeviceID->wasapi, &pMMDevice);
+        if (FAILED(hr)) {
+            IMMDeviceEnumerator_Release(pDeviceEnumerator);
+            errorMsg = "[WASAPI] Failed to create backend device.", result = MAL_WASAPI_FAILED_TO_CREATE_DEVICE;
+            goto done;
+        }
+    }
+
+    IMMDeviceEnumerator_Release(pDeviceEnumerator);
+
+    hr = IMMDevice_Activate(pMMDevice, g_malIID_IAudioClient, CLSCTX_ALL, NULL, &pDevice->wasapi.pAudioClient);
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to activate device.", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+#else
+    IActivateAudioInterfaceAsyncOperation *pAsyncOp = NULL;
+    malCompletionHandler completionHandler;
+
+    IID iid;
+    if (pDeviceID != NULL) {
+        mal_copy_memory(&iid, pDeviceID->wasapi, sizeof(iid));
+    } else {
+        if (type == mal_device_type_playback) {
+            iid = g_malIID_DEVINTERFACE_AUDIO_RENDER;
+        } else {
+            iid = g_malIID_DEVINTERFACE_AUDIO_CAPTURE;
+        }
+    }
+
+    LPOLESTR iidStr;
+    hr = StringFromIID(iid, &iidStr);
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to convert device IID to string for ActivateAudioInterfaceAsync(). Out of memory.", result = MAL_OUT_OF_MEMORY;
+        goto done;
+    }
+
+    result = completionHandler.Init();
+    if (result != MAL_SUCCESS) {
+        mal_CoTaskMemFree(pContext, iidStr);
+
+        errorMsg = "[WASAPI] Failed to create event for waiting for ActivateAudioInterfaceAsync().", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+
+    hr = ActivateAudioInterfaceAsync(iidStr, g_malIID_IAudioClient, NULL, &completionHandler, &pAsyncOp);
+    if (FAILED(hr)) {
+        completionHandler.Uninit();
+        mal_CoTaskMemFree(pContext, iidStr);
+
+        errorMsg = "[WASAPI] ActivateAudioInterfaceAsync() failed.", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+
+    mal_CoTaskMemFree(pContext, iidStr);
+
+    // Wait for the async operation for finish.
+    completionHandler.Wait();
+    completionHandler.Uninit();
+
+    HRESULT activateResult;
+    IUnknown* pActivatedInterface;
+    hr = IActivateAudioInterfaceAsyncOperation_GetActivateResult(pAsyncOp, &activateResult, &pActivatedInterface);
+    if (FAILED(hr) || FAILED(activateResult)) {
+        errorMsg = "[WASAPI] Failed to activate device.", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+
+    // Here is where we grab the IAudioClient interface.
+    hr = pActivatedInterface->QueryInterface(g_malIID_IAudioClient, &pDevice->wasapi.pAudioClient);
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to query IAudioClient interface.", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+#endif
+
+    // Here is where we try to determine the best format to use with the device. If the client if wanting exclusive mode, first try finding the best format for that. If this fails, fall back to shared mode.
+    WAVEFORMATEXTENSIBLE* pBestFormatTemp = NULL;
+    result = MAL_FORMAT_NOT_SUPPORTED;
+    if (pConfig->preferExclusiveMode) {
+        hr = IAudioClient_IsFormatSupported(pDevice->wasapi.pAudioClient, AUDCLNT_SHAREMODE_EXCLUSIVE, (WAVEFORMATEX*)&wf, NULL);
+    #ifdef MAL_WIN32_DESKTOP
+        if (hr == AUDCLNT_E_UNSUPPORTED_FORMAT) {
+            // The format isn't supported, so retrieve the actual format from the property store and try that.
+            IPropertyStore* pStore = NULL;
+            hr = IMMDevice_OpenPropertyStore(pMMDevice, STGM_READ, &pStore);
+            if (SUCCEEDED(hr)) {
+                PROPVARIANT prop;
+                PropVariantInit(&prop);
+                hr = IPropertyStore_GetValue(pStore, g_malPKEY_AudioEngine_DeviceFormat, &prop);
+                if (SUCCEEDED(hr)) {
+                    WAVEFORMATEX* pActualFormat = (WAVEFORMATEX*)prop.blob.pBlobData;
+                    hr = IAudioClient_IsFormatSupported(pDevice->wasapi.pAudioClient, AUDCLNT_SHAREMODE_EXCLUSIVE, pActualFormat, NULL);
+                    if (SUCCEEDED(hr)) {
+                        mal_copy_memory(&wf, pActualFormat, sizeof(WAVEFORMATEXTENSIBLE));
+                    }
+
+                    mal_PropVariantClear(pDevice->pContext, &prop);
+                }
+
+                IPropertyStore_Release(pStore);
+            }
+        }
+    #endif
+
+        if (hr == S_OK) {
+            shareMode = AUDCLNT_SHAREMODE_EXCLUSIVE;
+            result = MAL_SUCCESS;
+        }
+    }
+
+    // Fall back to shared mode if necessary.
+    if (result != MAL_SUCCESS) {
+        hr = IAudioClient_IsFormatSupported(pDevice->wasapi.pAudioClient, AUDCLNT_SHAREMODE_SHARED, (WAVEFORMATEX*)&wf, (WAVEFORMATEX**)&pBestFormatTemp);
+        if (hr != S_OK && hr != S_FALSE) {
+            hr = IAudioClient_GetMixFormat(pDevice->wasapi.pAudioClient, (WAVEFORMATEX**)&pBestFormatTemp);
+            if (hr != S_OK) {
+                result = MAL_WASAPI_FAILED_TO_FIND_BEST_FORMAT;
+            } else {
+                result = MAL_SUCCESS;
+            }
+        } else {
+            result = MAL_SUCCESS;
+        }
+
+        shareMode = AUDCLNT_SHAREMODE_SHARED;
+    }
+
+    // Return an error if we still haven't found a format.
+    if (result != MAL_SUCCESS) {
+        errorMsg = "[WASAPI] Failed to find best device mix format.", result = MAL_WASAPI_FAILED_TO_ACTIVATE_DEVICE;
+        goto done;
+    }
+
+    if (pBestFormatTemp != NULL) {
+        mal_copy_memory(&wf, pBestFormatTemp, sizeof(wf));
+        mal_CoTaskMemFree(pDevice->pContext, pBestFormatTemp);
+    }
+
+
+    REFERENCE_TIME bufferDurationInMicroseconds = ((mal_uint64)pDevice->bufferSizeInFrames * 1000 * 1000) / pConfig->sampleRate;
+
+    if (mal_is_guid_equal(wf.SubFormat, MAL_GUID_KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)) {
+        pDevice->internalFormat = mal_format_f32;
+    } else {
+        if (wf.Format.wBitsPerSample == 32) {
+            pDevice->internalFormat = mal_format_s32;
+        } else if (wf.Format.wBitsPerSample == 24) {
+            pDevice->internalFormat = mal_format_s24;
+        } else if (wf.Format.wBitsPerSample == 16) {
+            pDevice->internalFormat = mal_format_s16;
+        } else if (wf.Format.wBitsPerSample == 8) {
+            pDevice->internalFormat = mal_format_u8;
+        } else {
+            errorMsg = "[WASAPI] Device's native format is not supported.", result = MAL_FORMAT_NOT_SUPPORTED;
+            goto done;
+        }
+    }
+
+    pDevice->internalChannels = wf.Format.nChannels;
+    pDevice->internalSampleRate = wf.Format.nSamplesPerSec;
+
+    // Get the internal channel map based on the channel mask.
+    mal_channel_mask_to_channel_map__win32(wf.dwChannelMask, pDevice->internalChannels, pDevice->internalChannelMap);
+
+    // Slightly different initialization for shared and exclusive modes.
+    if (shareMode == AUDCLNT_SHAREMODE_SHARED) {
+        // Shared.
+        REFERENCE_TIME bufferDuration = bufferDurationInMicroseconds*10;
+        hr = IAudioClient_Initialize(pDevice->wasapi.pAudioClient, shareMode, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, bufferDuration, 0, (WAVEFORMATEX*)&wf, NULL);
+        if (FAILED(hr)) {
+            if (hr == E_ACCESSDENIED) {
+                errorMsg = "[WASAPI] Failed to initialize device. Access denied.", result = MAL_ACCESS_DENIED;
+            } else {
+                errorMsg = "[WASAPI] Failed to initialize device.", result = MAL_WASAPI_FAILED_TO_INITIALIZE_DEVICE;
+            }
+
+            goto done;
+        }
+    } else {
+        // Exclusive.
+        REFERENCE_TIME bufferDuration = bufferDurationInMicroseconds*10;
+        hr = IAudioClient_Initialize(pDevice->wasapi.pAudioClient, shareMode, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, bufferDuration, bufferDuration, (WAVEFORMATEX*)&wf, NULL);
+        if (hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED) {
+            UINT bufferSizeInFrames;
+            hr = IAudioClient_GetBufferSize(pDevice->wasapi.pAudioClient, &bufferSizeInFrames);
+            if (SUCCEEDED(hr)) {
+                bufferDuration = (REFERENCE_TIME)((10000.0 * 1000 / wf.Format.nSamplesPerSec * bufferSizeInFrames) + 0.5);
+
+                // Unfortunately we need to release and re-acquire the audio client according to MSDN. Seems silly - why not just call IAudioClient_Initialize() again?!
+                IAudioClient_Release(pDevice->wasapi.pAudioClient);
+
+            #ifdef MAL_WIN32_DESKTOP
+                hr = IMMDevice_Activate(pMMDevice, g_malIID_IAudioClient, CLSCTX_ALL, NULL, &pDevice->wasapi.pAudioClient);
+            #else
+                hr = pActivatedInterface->QueryInterface(g_malIID_IAudioClient, &pDevice->wasapi.pAudioClient);
+            #endif
+
+                if (SUCCEEDED(hr)) {
+                    hr = IAudioClient_Initialize(pDevice->wasapi.pAudioClient, shareMode, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, bufferDuration, bufferDuration, (WAVEFORMATEX*)&wf, NULL);
+                }
+            }
+        }
+
+        if (FAILED(hr)) {
+            errorMsg = "[WASAPI] Failed to initialize device.", result = MAL_WASAPI_FAILED_TO_INITIALIZE_DEVICE;
+            goto done;
+        }
+    }
+
+    hr = IAudioClient_GetBufferSize(pDevice->wasapi.pAudioClient, &pDevice->bufferSizeInFrames);
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to get audio client's actual buffer size.", result = MAL_WASAPI_FAILED_TO_INITIALIZE_DEVICE;
+        goto done;
+    }
+
+    if (type == mal_device_type_playback) {
+        hr = IAudioClient_GetService((IAudioClient*)pDevice->wasapi.pAudioClient, g_malIID_IAudioRenderClient, &pDevice->wasapi.pRenderClient);
+    } else {
+        hr = IAudioClient_GetService((IAudioClient*)pDevice->wasapi.pAudioClient, g_malIID_IAudioCaptureClient, &pDevice->wasapi.pCaptureClient);
+    }
+
+    if (FAILED(hr)) {
+        errorMsg = "[WASAPI] Failed to get audio client service.", result = MAL_WASAPI_FAILED_TO_INITIALIZE_DEVICE;
+        goto done;
+    }
+
+
+    if (shareMode == AUDCLNT_SHAREMODE_SHARED) {
+        pDevice->exclusiveMode = MAL_FALSE;
+    } else /*if (shareMode == AUDCLNT_SHAREMODE_EXCLUSIVE)*/ {
+        pDevice->exclusiveMode = MAL_TRUE;
+    }
+
+
+    // We need to create and set the event for event-driven mode. This event is signalled whenever a new chunk of audio
+    // data needs to be written or read from the device.
+    pDevice->wasapi.hEvent = CreateEventA(NULL, FALSE, FALSE, NULL);
+    if (pDevice->wasapi.hEvent == NULL) {
+        errorMsg = "[WASAPI] Failed to create main event for main loop.", result = MAL_FAILED_TO_CREATE_EVENT;
+        goto done;
+    }
+
+    IAudioClient_SetEventHandle(pDevice->wasapi.pAudioClient, pDevice->wasapi.hEvent);
+
+
+    // When the device is playing the worker thread will be waiting on a bunch of notification events. To return from
+    // this wait state we need to signal a special event.
+    pDevice->wasapi.hStopEvent = CreateEventA(NULL, FALSE, FALSE, NULL);
+    if (pDevice->wasapi.hStopEvent == NULL) {
+        errorMsg = "[WASAPI] Failed to create stop event for main loop break notification.", result = MAL_FAILED_TO_CREATE_EVENT;
+        goto done;
+    }
+
+    result = MAL_SUCCESS;
+
+done:
+    // Clean up.
+#ifdef MAL_WIN32_DESKTOP
+    if (pMMDevice != NULL) {
+        IMMDevice_Release(pMMDevice);
+    }
+#else
+    if (pAsyncOp != NULL) {
+        IActivateAudioInterfaceAsyncOperation_Release(pAsyncOp);
+    }
+#endif
+
+    if (result != MAL_SUCCESS) {
+        mal_device_uninit__wasapi(pDevice);
+        return mal_post_error(pDevice, errorMsg, result);
+    } else {
+        return MAL_SUCCESS;
+    }
+}
+
+static mal_result mal_device__start_backend__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Playback devices need to have an initial chunk of data loaded.
+    if (pDevice->type == mal_device_type_playback) {
+        BYTE* pData;
+        HRESULT hr = IAudioRenderClient_GetBuffer(pDevice->wasapi.pRenderClient, pDevice->bufferSizeInFrames, &pData);
+        if (FAILED(hr)) {
+            return mal_post_error(pDevice, "[WASAPI] Failed to retrieve buffer from internal playback device.", MAL_WASAPI_FAILED_TO_GET_INTERNAL_BUFFER);
+        }
+
+        mal_device__read_frames_from_client(pDevice, pDevice->bufferSizeInFrames, pData);
+
+        hr = IAudioRenderClient_ReleaseBuffer(pDevice->wasapi.pRenderClient, pDevice->bufferSizeInFrames, 0);
+        if (FAILED(hr)) {
+            return mal_post_error(pDevice, "[WASAPI] Failed to release internal buffer for playback device.", MAL_WASAPI_FAILED_TO_RELEASE_INTERNAL_BUFFER);
+        }
+    }
+
+    HRESULT hr = IAudioClient_Start(pDevice->wasapi.pAudioClient);
+    if (FAILED(hr)) {
+        return mal_post_error(pDevice, "[WASAPI] Failed to start internal device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    HRESULT hr = IAudioClient_Stop(pDevice->wasapi.pAudioClient);
+    if (FAILED(hr)) {
+        return mal_post_error(pDevice, "[WASAPI] Failed to stop internal device.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // The main loop will be waiting on a bunch of events via the WaitForMultipleObjects() API. One of those events
+    // is a special event we use for forcing that function to return.
+    pDevice->wasapi.breakFromMainLoop = MAL_TRUE;
+    SetEvent(pDevice->wasapi.hStopEvent);
+    return MAL_SUCCESS;
+}
+
+static mal_uint32 mal_device__get_available_frames__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+#if 1
+    if (pDevice->type == mal_device_type_playback) {
+        UINT32 paddingFramesCount;
+        HRESULT hr = IAudioClient_GetCurrentPadding(pDevice->wasapi.pAudioClient, &paddingFramesCount);
+        if (FAILED(hr)) {
+            return 0;
+        }
+
+        if (pDevice->exclusiveMode) {
+            return paddingFramesCount;
+        } else {
+            return pDevice->bufferSizeInFrames - paddingFramesCount;
+        }
+    } else {
+        UINT32 framesAvailable;
+        HRESULT hr = IAudioCaptureClient_GetNextPacketSize(pDevice->wasapi.pCaptureClient, &framesAvailable);
+        if (FAILED(hr)) {
+            return 0;
+        }
+
+        return framesAvailable;
+    }
+#else
+    UINT32 paddingFramesCount;
+    HRESULT hr = IAudioClient_GetCurrentPadding(pDevice->wasapi.pAudioClient, &paddingFramesCount);
+    if (FAILED(hr)) {
+        return 0;
+    }
+
+    if (pDevice->exclusiveMode) {
+        return paddingFramesCount;
+    } else {
+        return pDevice->bufferSizeInFrames - paddingFramesCount;
+    }
+#endif
+}
+
+static mal_uint32 mal_device__wait_for_frames__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    while (!pDevice->wasapi.breakFromMainLoop) {
+        // Wait for a buffer to become available or for the stop event to be signalled.
+        HANDLE hEvents[2];
+        hEvents[0] = (HANDLE)pDevice->wasapi.hEvent;
+        hEvents[1] = (HANDLE)pDevice->wasapi.hStopEvent;
+        if (WaitForMultipleObjects(mal_countof(hEvents), hEvents, FALSE, INFINITE) == WAIT_FAILED) {
+            break;
+        }
+
+        // Break from the main loop if the device isn't started anymore. Likely what's happened is the application
+        // has requested that the device be stopped.
+        if (!mal_device_is_started(pDevice)) {
+            break;
+        }
+
+        mal_uint32 framesAvailable = mal_device__get_available_frames__wasapi(pDevice);
+        if (framesAvailable > 0) {
+            return framesAvailable;
+        }
+    }
+
+    // We'll get here if the loop was terminated. Just return whatever's available.
+    return mal_device__get_available_frames__wasapi(pDevice);
+}
+
+static mal_result mal_device__main_loop__wasapi(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Make sure the stop event is not signaled to ensure we don't end up immediately returning from WaitForMultipleObjects().
+    ResetEvent(pDevice->wasapi.hStopEvent);
+
+    pDevice->wasapi.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->wasapi.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__wasapi(pDevice);
+        if (framesAvailable == 0) {
+            continue;
+        }
+
+        // If it's a playback device, don't bother grabbing more data if the device is being stopped.
+        if (pDevice->wasapi.breakFromMainLoop && pDevice->type == mal_device_type_playback) {
+            return MAL_FALSE;
+        }
+
+        if (pDevice->type == mal_device_type_playback) {
+            BYTE* pData;
+            HRESULT hr = IAudioRenderClient_GetBuffer(pDevice->wasapi.pRenderClient, framesAvailable, &pData);
+            if (FAILED(hr)) {
+                return mal_post_error(pDevice, "[WASAPI] Failed to retrieve internal buffer from playback device in preparation for sending new data to the device.", MAL_WASAPI_FAILED_TO_GET_INTERNAL_BUFFER);
+            }
+
+            mal_device__read_frames_from_client(pDevice, framesAvailable, pData);
+
+            hr = IAudioRenderClient_ReleaseBuffer(pDevice->wasapi.pRenderClient, framesAvailable, 0);
+            if (FAILED(hr)) {
+                return mal_post_error(pDevice, "[WASAPI] Failed to release internal buffer from playback device in preparation for sending new data to the device.", MAL_WASAPI_FAILED_TO_RELEASE_INTERNAL_BUFFER);
+            }
+        } else {
+            UINT32 framesRemaining = framesAvailable;
+            while (framesRemaining > 0) {
+                BYTE* pData;
+                UINT32 framesToSend;
+                DWORD flags;
+                HRESULT hr = IAudioCaptureClient_GetBuffer(pDevice->wasapi.pCaptureClient, &pData, &framesToSend, &flags, NULL, NULL);
+                if (FAILED(hr)) {
+                    mal_post_error(pDevice, "[WASAPI] WARNING: Failed to retrieve internal buffer from capture device in preparation for sending new data to the client.", MAL_WASAPI_FAILED_TO_GET_INTERNAL_BUFFER);
+                    break;
+                }
+
+                if (hr != AUDCLNT_S_BUFFER_EMPTY) {
+                    mal_device__send_frames_to_client(pDevice, framesToSend, pData);
+
+                    hr = IAudioCaptureClient_ReleaseBuffer(pDevice->wasapi.pCaptureClient, framesToSend);
+                    if (FAILED(hr)) {
+                        mal_post_error(pDevice, "[WASAPI] WARNING: Failed to release internal buffer from capture device in preparation for sending new data to the client.", MAL_WASAPI_FAILED_TO_RELEASE_INTERNAL_BUFFER);
+                        break;
+                    }
+
+                    if (framesRemaining >= framesToSend) {
+                        framesRemaining -= framesToSend;
+                    } else {
+                        framesRemaining = 0;
+                    }
+                }
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// DirectSound Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_DSOUND
+#include <dsound.h>
+
+#if 0   // MAL_GUID_NULL is not currently used, but leaving it here in case I need to add it back again.
+static GUID MAL_GUID_NULL                          = {0x00000000, 0x0000, 0x0000, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
+#endif
+static GUID MAL_GUID_IID_DirectSoundNotify         = {0xb0210783, 0x89cd, 0x11d0, {0xaf, 0x08, 0x00, 0xa0, 0xc9, 0x25, 0xcd, 0x16}};
+static GUID MAL_GUID_IID_IDirectSoundCaptureBuffer = {0xb0210782, 0x89cd, 0x11d0, {0xaf, 0x08, 0x00, 0xa0, 0xc9, 0x25, 0xcd, 0x16}};
+
+typedef HRESULT (WINAPI * mal_DirectSoundCreateProc)(const GUID* pcGuidDevice, LPDIRECTSOUND *ppDS8, LPUNKNOWN pUnkOuter);
+typedef HRESULT (WINAPI * mal_DirectSoundEnumerateAProc)(LPDSENUMCALLBACKA pDSEnumCallback, LPVOID pContext);
+typedef HRESULT (WINAPI * mal_DirectSoundCaptureCreateProc)(const GUID* pcGuidDevice, LPDIRECTSOUNDCAPTURE *ppDSC8, LPUNKNOWN pUnkOuter);
+typedef HRESULT (WINAPI * mal_DirectSoundCaptureEnumerateAProc)(LPDSENUMCALLBACKA pDSEnumCallback, LPVOID pContext);
+
+static HMODULE mal_open_dsound_dll()
+{
+    return LoadLibraryW(L"dsound.dll");
+}
+
+static void mal_close_dsound_dll(HMODULE hModule)
+{
+    FreeLibrary(hModule);
+}
+
+
+mal_result mal_context_init__dsound(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__dsound(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_dsound);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+
+typedef struct
+{
+    mal_uint32 deviceCount;
+    mal_uint32 infoCount;
+    mal_device_info* pInfo;
+} mal_device_enum_data__dsound;
+
+static BOOL CALLBACK mal_enum_devices_callback__dsound(LPGUID lpGuid, LPCSTR lpcstrDescription, LPCSTR lpcstrModule, LPVOID lpContext)
+{
+    (void)lpcstrModule;
+
+    mal_device_enum_data__dsound* pData = (mal_device_enum_data__dsound*)lpContext;
+    mal_assert(pData != NULL);
+
+    if (pData->pInfo != NULL) {
+        if (pData->infoCount > 0) {
+            mal_zero_object(pData->pInfo);
+            mal_strncpy_s(pData->pInfo->name, sizeof(pData->pInfo->name), lpcstrDescription, (size_t)-1);
+
+            if (lpGuid != NULL) {
+                mal_copy_memory(pData->pInfo->id.dsound, lpGuid, 16);
+            } else {
+                mal_zero_memory(pData->pInfo->id.dsound, 16);
+            }
+
+            pData->pInfo += 1;
+            pData->infoCount -= 1;
+            pData->deviceCount += 1;
+        }
+    } else {
+        pData->deviceCount += 1;
+    }
+
+    return TRUE;
+}
+
+static mal_result mal_enumerate_devices__dsound(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    mal_device_enum_data__dsound enumData;
+    enumData.deviceCount = 0;
+    enumData.infoCount = infoSize;
+    enumData.pInfo = pInfo;
+
+    HMODULE dsoundDLL = mal_open_dsound_dll();
+    if (dsoundDLL == NULL) {
+        return MAL_NO_BACKEND;
+    }
+
+    if (type == mal_device_type_playback) {
+        mal_DirectSoundEnumerateAProc pDirectSoundEnumerateA = (mal_DirectSoundEnumerateAProc)GetProcAddress(dsoundDLL, "DirectSoundEnumerateA");
+        if (pDirectSoundEnumerateA) {
+            pDirectSoundEnumerateA(mal_enum_devices_callback__dsound, &enumData);
+        }
+    } else {
+        mal_DirectSoundCaptureEnumerateAProc pDirectSoundCaptureEnumerateA = (mal_DirectSoundCaptureEnumerateAProc)GetProcAddress(dsoundDLL, "DirectSoundCaptureEnumerateA");
+        if (pDirectSoundCaptureEnumerateA) {
+            pDirectSoundCaptureEnumerateA(mal_enum_devices_callback__dsound, &enumData);
+        }
+    }
+
+
+    mal_close_dsound_dll(dsoundDLL);
+
+    *pCount = enumData.deviceCount;
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->dsound.hDSoundDLL != NULL) {
+        if (pDevice->dsound.pNotify) {
+            IDirectSoundNotify_Release((LPDIRECTSOUNDNOTIFY)pDevice->dsound.pNotify);
+        }
+
+        if (pDevice->dsound.hStopEvent) {
+            CloseHandle(pDevice->dsound.hStopEvent);
+        }
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            if (pDevice->dsound.pNotifyEvents[i]) {
+                CloseHandle(pDevice->dsound.pNotifyEvents[i]);
+            }
+        }
+
+        if (pDevice->dsound.pCaptureBuffer) {
+            IDirectSoundCaptureBuffer_Release((LPDIRECTSOUNDBUFFER)pDevice->dsound.pCaptureBuffer);
+        }
+        if (pDevice->dsound.pCapture) {
+            IDirectSoundCapture_Release((LPDIRECTSOUNDCAPTURE)pDevice->dsound.pCapture);
+        }
+
+        if (pDevice->dsound.pPlaybackBuffer) {
+            IDirectSoundBuffer_Release((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer);
+        }
+        if (pDevice->dsound.pPlaybackPrimaryBuffer) {
+            IDirectSoundBuffer_Release((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackPrimaryBuffer);
+        }
+        if (pDevice->dsound.pPlayback != NULL) {
+            IDirectSound_Release((LPDIRECTSOUND)pDevice->dsound.pPlayback);
+        }
+
+        mal_close_dsound_dll((HMODULE)pDevice->dsound.hDSoundDLL);
+    }
+}
+
+static mal_result mal_device_init__dsound(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+#ifdef __cplusplus
+    GUID _MAL_GUID_IID_DirectSoundNotify           = MAL_GUID_IID_DirectSoundNotify;
+    GUID _MAL_GUID_IID_IDirectSoundCaptureBuffer   = MAL_GUID_IID_IDirectSoundCaptureBuffer;
+#else
+    GUID* _MAL_GUID_IID_DirectSoundNotify          = &MAL_GUID_IID_DirectSoundNotify;
+    GUID* _MAL_GUID_IID_IDirectSoundCaptureBuffer  = &MAL_GUID_IID_IDirectSoundCaptureBuffer;
+#endif
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->dsound);
+
+    pDevice->dsound.hDSoundDLL = (mal_handle)mal_open_dsound_dll();
+    if (pDevice->dsound.hDSoundDLL == NULL) {
+        return MAL_NO_BACKEND;
+    }
+
+    // Check that we have a valid format.
+    GUID subformat;
+    switch (pConfig->format)
+    {
+        case mal_format_u8:
+        case mal_format_s16:
+        case mal_format_s24:
+        case mal_format_s32:
+        {
+            subformat = MAL_GUID_KSDATAFORMAT_SUBTYPE_PCM;
+        } break;
+
+        case mal_format_f32:
+        {
+            subformat = MAL_GUID_KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
+        } break;
+
+        default:
+        return MAL_FORMAT_NOT_SUPPORTED;
+    }
+
+
+    WAVEFORMATEXTENSIBLE wf;
+    mal_zero_object(&wf);
+    wf.Format.cbSize               = sizeof(wf);
+    wf.Format.wFormatTag           = WAVE_FORMAT_EXTENSIBLE;
+    wf.Format.nChannels            = (WORD)pConfig->channels;
+    wf.Format.nSamplesPerSec       = (DWORD)pConfig->sampleRate;
+    wf.Format.wBitsPerSample       = (WORD)mal_get_sample_size_in_bytes(pConfig->format)*8;
+    wf.Format.nBlockAlign          = (wf.Format.nChannels * wf.Format.wBitsPerSample) / 8;
+    wf.Format.nAvgBytesPerSec      = wf.Format.nBlockAlign * wf.Format.nSamplesPerSec;
+    wf.Samples.wValidBitsPerSample = wf.Format.wBitsPerSample;
+    wf.dwChannelMask               = mal_channel_map_to_channel_mask__win32(pConfig->channelMap, pConfig->channels);
+    wf.SubFormat                   = subformat;
+
+    DWORD bufferSizeInBytes = 0;
+
+    // Unfortunately DirectSound uses different APIs and data structures for playback and catpure devices :(
+    if (type == mal_device_type_playback) {
+        mal_DirectSoundCreateProc pDirectSoundCreate = (mal_DirectSoundCreateProc)GetProcAddress((HMODULE)pDevice->dsound.hDSoundDLL, "DirectSoundCreate");
+        if (pDirectSoundCreate == NULL) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Could not find DirectSoundCreate().", MAL_API_NOT_FOUND);
+        }
+
+        if (FAILED(pDirectSoundCreate((pDeviceID == NULL) ? NULL : (const GUID*)pDeviceID->dsound, (LPDIRECTSOUND*)&pDevice->dsound.pPlayback, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] DirectSoundCreate() failed for playback device.", MAL_DSOUND_FAILED_TO_CREATE_DEVICE);
+        }
+
+        // The cooperative level must be set before doing anything else.
+        HWND hWnd = ((MAL_PFN_GetForegroundWindow)pContext->win32.GetForegroundWindow)();
+        if (hWnd == NULL) {
+            hWnd = ((MAL_PFN_GetDesktopWindow)pContext->win32.GetDesktopWindow)();
+        }
+        if (FAILED(IDirectSound_SetCooperativeLevel((LPDIRECTSOUND)pDevice->dsound.pPlayback, hWnd, (pConfig->preferExclusiveMode) ? DSSCL_EXCLUSIVE : DSSCL_PRIORITY))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSound_SetCooperateiveLevel() failed for playback device.", MAL_DSOUND_FAILED_TO_SET_COOP_LEVEL);
+        }
+
+        DSBUFFERDESC descDSPrimary;
+        mal_zero_object(&descDSPrimary);
+        descDSPrimary.dwSize  = sizeof(DSBUFFERDESC);
+        descDSPrimary.dwFlags = DSBCAPS_PRIMARYBUFFER | DSBCAPS_CTRLVOLUME;
+        if (FAILED(IDirectSound_CreateSoundBuffer((LPDIRECTSOUND)pDevice->dsound.pPlayback, &descDSPrimary, (LPDIRECTSOUNDBUFFER*)&pDevice->dsound.pPlaybackPrimaryBuffer, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSound_CreateSoundBuffer() failed for playback device's primary buffer.", MAL_DSOUND_FAILED_TO_CREATE_BUFFER);
+        }
+
+        // From MSDN:
+        //
+        // The method succeeds even if the hardware does not support the requested format; DirectSound sets the buffer to the closest
+        // supported format. To determine whether this has happened, an application can call the GetFormat method for the primary buffer
+        // and compare the result with the format that was requested with the SetFormat method.
+        if (FAILED(IDirectSoundBuffer_SetFormat((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackPrimaryBuffer, (WAVEFORMATEX*)&wf))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Failed to set format of playback device's primary buffer.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+
+        // Get the _actual_ properties of the buffer. This is silly API design...
+        DWORD requiredSize;
+        if (FAILED(IDirectSoundBuffer_GetFormat((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackPrimaryBuffer, NULL, 0, &requiredSize))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Failed to retrieve the actual format of the playback device's primary buffer.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+
+        char rawdata[1024];
+        WAVEFORMATEXTENSIBLE* pActualFormat = (WAVEFORMATEXTENSIBLE*)rawdata;
+        if (FAILED(IDirectSoundBuffer_GetFormat((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackPrimaryBuffer, (WAVEFORMATEX*)pActualFormat, requiredSize, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Failed to retrieve the actual format of the playback device's primary buffer.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+
+        pDevice->internalChannels = pActualFormat->Format.nChannels;
+        pDevice->internalSampleRate = pActualFormat->Format.nSamplesPerSec;
+        bufferSizeInBytes = pDevice->bufferSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->format);
+
+        // Get the internal channel map based on the channel mask.
+        mal_channel_mask_to_channel_map__win32(pActualFormat->dwChannelMask, pDevice->internalChannels, pDevice->internalChannelMap);
+
+
+        // Meaning of dwFlags (from MSDN):
+        //
+        // DSBCAPS_CTRLPOSITIONNOTIFY
+        //   The buffer has position notification capability.
+        //
+        // DSBCAPS_GLOBALFOCUS
+        //   With this flag set, an application using DirectSound can continue to play its buffers if the user switches focus to
+        //   another application, even if the new application uses DirectSound.
+        //
+        // DSBCAPS_GETCURRENTPOSITION2
+        //   In the first version of DirectSound, the play cursor was significantly ahead of the actual playing sound on emulated
+        //   sound cards; it was directly behind the write cursor. Now, if the DSBCAPS_GETCURRENTPOSITION2 flag is specified, the
+        //   application can get a more accurate play cursor.
+        DSBUFFERDESC descDS;
+        mal_zero_object(&descDS);
+        descDS.dwSize = sizeof(DSBUFFERDESC);
+        descDS.dwFlags = DSBCAPS_CTRLPOSITIONNOTIFY | DSBCAPS_GLOBALFOCUS | DSBCAPS_GETCURRENTPOSITION2;
+        descDS.dwBufferBytes = bufferSizeInBytes;
+        descDS.lpwfxFormat = (WAVEFORMATEX*)&wf;
+        if (FAILED(IDirectSound_CreateSoundBuffer((LPDIRECTSOUND)pDevice->dsound.pPlayback, &descDS, (LPDIRECTSOUNDBUFFER*)&pDevice->dsound.pPlaybackBuffer, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSound_CreateSoundBuffer() failed for playback device's secondary buffer.", MAL_DSOUND_FAILED_TO_CREATE_BUFFER);
+        }
+
+        // Notifications are set up via a DIRECTSOUNDNOTIFY object which is retrieved from the buffer.
+        if (FAILED(IDirectSoundBuffer_QueryInterface((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, _MAL_GUID_IID_DirectSoundNotify, (void**)&pDevice->dsound.pNotify))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundBuffer_QueryInterface() failed for playback device's IDirectSoundNotify object.", MAL_DSOUND_FAILED_TO_QUERY_INTERFACE);
+        }
+    } else {
+        // The default buffer size is treated slightly differently for DirectSound which, for some reason, seems to
+        // have worse latency with capture than playback (sometimes _much_ worse).
+        if (pDevice->usingDefaultBufferSize) {
+            pDevice->bufferSizeInFrames *= 2; // <-- Might need to fiddle with this to find a more ideal value. May even be able to just add a fixed amount rather than scaling.
+        }
+
+        mal_DirectSoundCaptureCreateProc pDirectSoundCaptureCreate = (mal_DirectSoundCaptureCreateProc)GetProcAddress((HMODULE)pDevice->dsound.hDSoundDLL, "DirectSoundCaptureCreate");
+        if (pDirectSoundCaptureCreate == NULL) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Could not find DirectSoundCreate().", MAL_API_NOT_FOUND);
+        }
+
+        if (FAILED(pDirectSoundCaptureCreate((pDeviceID == NULL) ? NULL : (const GUID*)pDeviceID->dsound, (LPDIRECTSOUNDCAPTURE*)&pDevice->dsound.pCapture, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] DirectSoundCaptureCreate() failed for capture device.", MAL_DSOUND_FAILED_TO_CREATE_DEVICE);
+        }
+
+        bufferSizeInBytes = pDevice->bufferSizeInFrames * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+
+        DSCBUFFERDESC descDS;
+        mal_zero_object(&descDS);
+        descDS.dwSize = sizeof(descDS);
+        descDS.dwFlags = 0;
+        descDS.dwBufferBytes = bufferSizeInBytes;
+        descDS.lpwfxFormat = (WAVEFORMATEX*)&wf;
+        LPDIRECTSOUNDCAPTUREBUFFER pDSCB_Temp;
+        if (FAILED(IDirectSoundCapture_CreateCaptureBuffer((LPDIRECTSOUNDCAPTURE)pDevice->dsound.pCapture, &descDS, &pDSCB_Temp, NULL))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundCapture_CreateCaptureBuffer() failed for capture device.", MAL_DSOUND_FAILED_TO_CREATE_BUFFER);
+        }
+
+        HRESULT hr = IDirectSoundCapture_QueryInterface(pDSCB_Temp, _MAL_GUID_IID_IDirectSoundCaptureBuffer, (LPVOID*)&pDevice->dsound.pCaptureBuffer);
+        IDirectSoundCaptureBuffer_Release(pDSCB_Temp);
+        if (FAILED(hr)) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundCapture_QueryInterface() failed for capture device's IDirectSoundCaptureBuffer8 object.", MAL_DSOUND_FAILED_TO_QUERY_INTERFACE);
+        }
+
+        // Notifications are set up via a DIRECTSOUNDNOTIFY object which is retrieved from the buffer.
+        if (FAILED(IDirectSoundCaptureBuffer_QueryInterface((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer, _MAL_GUID_IID_DirectSoundNotify, (void**)&pDevice->dsound.pNotify))) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundCaptureBuffer_QueryInterface() failed for capture device's IDirectSoundNotify object.", MAL_DSOUND_FAILED_TO_QUERY_INTERFACE);
+        }
+    }
+
+    // We need a notification for each period. The notification offset is slightly different depending on whether or not the
+    // device is a playback or capture device. For a playback device we want to be notified when a period just starts playing,
+    // whereas for a capture device we want to be notified when a period has just _finished_ capturing.
+    mal_uint32 periodSizeInBytes = pDevice->bufferSizeInFrames / pDevice->periods;
+    DSBPOSITIONNOTIFY notifyPoints[MAL_MAX_PERIODS_DSOUND];  // One notification event for each period.
+    for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+        pDevice->dsound.pNotifyEvents[i] = CreateEventA(NULL, FALSE, FALSE, NULL);
+        if (pDevice->dsound.pNotifyEvents[i] == NULL) {
+            mal_device_uninit__dsound(pDevice);
+            return mal_post_error(pDevice, "[DirectSound] Failed to create event for buffer notifications.", MAL_FAILED_TO_CREATE_EVENT);
+        }
+
+        // The notification offset is in bytes.
+        notifyPoints[i].dwOffset = i * periodSizeInBytes;
+        notifyPoints[i].hEventNotify = pDevice->dsound.pNotifyEvents[i];
+    }
+
+    if (FAILED(IDirectSoundNotify_SetNotificationPositions((LPDIRECTSOUNDNOTIFY)pDevice->dsound.pNotify, pDevice->periods, notifyPoints))) {
+        mal_device_uninit__dsound(pDevice);
+        return mal_post_error(pDevice, "[DirectSound] IDirectSoundNotify_SetNotificationPositions() failed.", MAL_DSOUND_FAILED_TO_SET_NOTIFICATIONS);
+    }
+
+    // When the device is playing the worker thread will be waiting on a bunch of notification events. To return from
+    // this wait state we need to signal a special event.
+    pDevice->dsound.hStopEvent = CreateEventA(NULL, FALSE, FALSE, NULL);
+    if (pDevice->dsound.hStopEvent == NULL) {
+        mal_device_uninit__dsound(pDevice);
+        return mal_post_error(pDevice, "[DirectSound] Failed to create event for main loop break notification.", MAL_FAILED_TO_CREATE_EVENT);
+    }
+
+    return MAL_SUCCESS;
+}
+
+
+static mal_result mal_device__start_backend__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        // Before playing anything we need to grab an initial group of samples from the client.
+        mal_uint32 framesToRead = pDevice->bufferSizeInFrames / pDevice->periods;
+        mal_uint32 desiredLockSize = framesToRead * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+
+        void* pLockPtr;
+        DWORD actualLockSize;
+        void* pLockPtr2;
+        DWORD actualLockSize2;
+        if (SUCCEEDED(IDirectSoundBuffer_Lock((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, 0, desiredLockSize, &pLockPtr, &actualLockSize, &pLockPtr2, &actualLockSize2, 0))) {
+            framesToRead = actualLockSize / mal_get_sample_size_in_bytes(pDevice->format) / pDevice->channels;
+            mal_device__read_frames_from_client(pDevice, framesToRead, pLockPtr);
+            IDirectSoundBuffer_Unlock((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, pLockPtr, actualLockSize, pLockPtr2, actualLockSize2);
+
+            pDevice->dsound.lastProcessedFrame = framesToRead;
+            if (FAILED(IDirectSoundBuffer_Play((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, 0, 0, DSBPLAY_LOOPING))) {
+                return mal_post_error(pDevice, "[DirectSound] IDirectSoundBuffer_Play() failed.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        } else {
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundBuffer_Lock() failed.", MAL_FAILED_TO_MAP_DEVICE_BUFFER);
+        }
+    } else {
+        if (FAILED(IDirectSoundCaptureBuffer_Start((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer, DSCBSTART_LOOPING))) {
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundCaptureBuffer_Start() failed.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        if (FAILED(IDirectSoundBuffer_Stop((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer))) {
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundBuffer_Stop() failed.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+        }
+
+        IDirectSoundBuffer_SetCurrentPosition((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, 0);
+    } else {
+        if (FAILED(IDirectSoundCaptureBuffer_Stop((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer))) {
+            return mal_post_error(pDevice, "[DirectSound] IDirectSoundCaptureBuffer_Stop() failed.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // The main loop will be waiting on a bunch of events via the WaitForMultipleObjects() API. One of those events
+    // is a special event we use for forcing that function to return.
+    pDevice->dsound.breakFromMainLoop = MAL_TRUE;
+    SetEvent(pDevice->dsound.hStopEvent);
+    return MAL_SUCCESS;
+}
+
+static mal_bool32 mal_device__get_current_frame__dsound(mal_device* pDevice, mal_uint32* pCurrentPos)
+{
+    mal_assert(pDevice != NULL);
+    mal_assert(pCurrentPos != NULL);
+    *pCurrentPos = 0;
+
+    DWORD dwCurrentPosition;
+    if (pDevice->type == mal_device_type_playback) {
+        if (FAILED(IDirectSoundBuffer_GetCurrentPosition((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, NULL, &dwCurrentPosition))) {
+            return MAL_FALSE;
+        }
+    } else {
+        if (FAILED(IDirectSoundCaptureBuffer_GetCurrentPosition((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer, &dwCurrentPosition, NULL))) {
+            return MAL_FALSE;
+        }
+    }
+
+    *pCurrentPos = (mal_uint32)dwCurrentPosition / mal_get_sample_size_in_bytes(pDevice->format) / pDevice->channels;
+    return MAL_TRUE;
+}
+
+static mal_uint32 mal_device__get_available_frames__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_uint32 currentFrame;
+    if (!mal_device__get_current_frame__dsound(pDevice, &currentFrame)) {
+        return 0;
+    }
+
+    // In a playback device the last processed frame should always be ahead of the current frame. The space between
+    // the last processed and current frame (moving forward, starting from the last processed frame) is the amount
+    // of space available to write.
+    //
+    // For a recording device it's the other way around - the last processed frame is always _behind_ the current
+    // frame and the space between is the available space.
+    mal_uint32 totalFrameCount = pDevice->bufferSizeInFrames;
+    if (pDevice->type == mal_device_type_playback) {
+        mal_uint32 committedBeg = currentFrame;
+        mal_uint32 committedEnd;
+        committedEnd = pDevice->dsound.lastProcessedFrame;
+        if (committedEnd <= committedBeg) {
+            committedEnd += totalFrameCount;
+        }
+
+        mal_uint32 committedSize = (committedEnd - committedBeg);
+        mal_assert(committedSize <= totalFrameCount);
+
+        return totalFrameCount - committedSize;
+    } else {
+        mal_uint32 validBeg = pDevice->dsound.lastProcessedFrame;
+        mal_uint32 validEnd = currentFrame;
+        if (validEnd < validBeg) {
+            validEnd += totalFrameCount;        // Wrap around.
+        }
+
+        mal_uint32 validSize = (validEnd - validBeg);
+        mal_assert(validSize <= totalFrameCount);
+
+        return validSize;
+    }
+}
+
+static mal_uint32 mal_device__wait_for_frames__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // The timeout to use for putting the thread to sleep is based on the size of the buffer and the period count.
+    DWORD timeoutInMilliseconds = (pDevice->bufferSizeInFrames / (pDevice->sampleRate/1000)) / pDevice->periods;
+    if (timeoutInMilliseconds < 1) {
+        timeoutInMilliseconds = 1;
+    }
+
+    unsigned int eventCount = pDevice->periods + 1;
+    HANDLE pEvents[MAL_MAX_PERIODS_DSOUND + 1];   // +1 for the stop event.
+    mal_copy_memory(pEvents, pDevice->dsound.pNotifyEvents, sizeof(HANDLE) * pDevice->periods);
+    pEvents[eventCount-1] = pDevice->dsound.hStopEvent;
+
+    while (!pDevice->dsound.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__get_available_frames__dsound(pDevice);
+        if (framesAvailable > 0) {
+            return framesAvailable;
+        }
+
+        // If we get here it means we weren't able to find any frames. We'll just wait here for a bit.
+        WaitForMultipleObjects(eventCount, pEvents, FALSE, timeoutInMilliseconds);
+    }
+
+    // We'll get here if the loop was terminated. Just return whatever's available.
+    return mal_device__get_available_frames__dsound(pDevice);
+}
+
+static mal_result mal_device__main_loop__dsound(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Make sure the stop event is not signaled to ensure we don't end up immediately returning from WaitForMultipleObjects().
+    ResetEvent(pDevice->dsound.hStopEvent);
+
+    pDevice->dsound.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->dsound.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__dsound(pDevice);
+        if (framesAvailable == 0) {
+            continue;
+        }
+
+        // If it's a playback device, don't bother grabbing more data if the device is being stopped.
+        if (pDevice->dsound.breakFromMainLoop && pDevice->type == mal_device_type_playback) {
+            return MAL_FALSE;
+        }
+
+        DWORD lockOffset = pDevice->dsound.lastProcessedFrame * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+        DWORD lockSize   = framesAvailable * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+
+        if (pDevice->type == mal_device_type_playback) {
+            void* pLockPtr;
+            DWORD actualLockSize;
+            void* pLockPtr2;
+            DWORD actualLockSize2;
+            if (FAILED(IDirectSoundBuffer_Lock((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, lockOffset, lockSize, &pLockPtr, &actualLockSize, &pLockPtr2, &actualLockSize2, 0))) {
+                return mal_post_error(pDevice, "[DirectSound] IDirectSoundBuffer_Lock() failed.", MAL_FAILED_TO_MAP_DEVICE_BUFFER);
+            }
+
+            mal_uint32 frameCount = actualLockSize / mal_get_sample_size_in_bytes(pDevice->format) / pDevice->channels;
+            mal_device__read_frames_from_client(pDevice, frameCount, pLockPtr);
+            pDevice->dsound.lastProcessedFrame = (pDevice->dsound.lastProcessedFrame + frameCount) % pDevice->bufferSizeInFrames;
+
+            IDirectSoundBuffer_Unlock((LPDIRECTSOUNDBUFFER)pDevice->dsound.pPlaybackBuffer, pLockPtr, actualLockSize, pLockPtr2, actualLockSize2);
+        } else {
+            void* pLockPtr;
+            DWORD actualLockSize;
+            void* pLockPtr2;
+            DWORD actualLockSize2;
+            if (FAILED(IDirectSoundCaptureBuffer_Lock((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer, lockOffset, lockSize, &pLockPtr, &actualLockSize, &pLockPtr2, &actualLockSize2, 0))) {
+                return mal_post_error(pDevice, "[DirectSound] IDirectSoundCaptureBuffer_Lock() failed.", MAL_FAILED_TO_MAP_DEVICE_BUFFER);
+            }
+
+            mal_uint32 frameCount = actualLockSize / mal_get_sample_size_in_bytes(pDevice->format) / pDevice->channels;
+            mal_device__send_frames_to_client(pDevice, frameCount, pLockPtr);
+            pDevice->dsound.lastProcessedFrame = (pDevice->dsound.lastProcessedFrame + frameCount) % pDevice->bufferSizeInFrames;
+
+            IDirectSoundCaptureBuffer_Unlock((LPDIRECTSOUNDCAPTUREBUFFER)pDevice->dsound.pCaptureBuffer, pLockPtr, actualLockSize, pLockPtr2, actualLockSize2);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// WinMM Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_WINMM
+#include <mmsystem.h>
+
+#if !defined(MAXULONG_PTR)
+typedef size_t DWORD_PTR;
+#endif
+
+#if !defined(WAVE_FORMAT_44M08)
+#define WAVE_FORMAT_44M08 0x00000100
+#define WAVE_FORMAT_44S08 0x00000200
+#define WAVE_FORMAT_44M16 0x00000400
+#define WAVE_FORMAT_44S16 0x00000800
+#define WAVE_FORMAT_48M08 0x00001000
+#define WAVE_FORMAT_48S08 0x00002000
+#define WAVE_FORMAT_48M16 0x00004000
+#define WAVE_FORMAT_48S16 0x00008000
+#define WAVE_FORMAT_96M08 0x00010000
+#define WAVE_FORMAT_96S08 0x00020000
+#define WAVE_FORMAT_96M16 0x00040000
+#define WAVE_FORMAT_96S16 0x00080000
+#endif
+
+typedef UINT     (WINAPI * MAL_PFN_waveOutGetNumDevs)(void);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutGetDevCapsA)(UINT_PTR uDeviceID, LPWAVEOUTCAPSA pwoc, UINT cbwoc);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutOpen)(LPHWAVEOUT phwo, UINT uDeviceID, LPCWAVEFORMATEX pwfx, DWORD_PTR dwCallback, DWORD_PTR dwInstance, DWORD fdwOpen);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutClose)(HWAVEOUT hwo);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutPrepareHeader)(HWAVEOUT hwo, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutUnprepareHeader)(HWAVEOUT hwo, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutWrite)(HWAVEOUT hwo, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveOutReset)(HWAVEOUT hwo);
+typedef UINT     (WINAPI * MAL_PFN_waveInGetNumDevs)(void);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInGetDevCapsA)(UINT_PTR uDeviceID, LPWAVEINCAPSA pwic, UINT cbwic);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInOpen)(LPHWAVEIN phwi, UINT uDeviceID, LPCWAVEFORMATEX pwfx, DWORD_PTR dwCallback, DWORD_PTR dwInstance, DWORD fdwOpen);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInClose)(HWAVEIN hwi);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInPrepareHeader)(HWAVEIN hwi, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInUnprepareHeader)(HWAVEIN hwi, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInAddBuffer)(HWAVEIN hwi, LPWAVEHDR pwh, UINT cbwh);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInStart)(HWAVEIN hwi);
+typedef MMRESULT (WINAPI * MAL_PFN_waveInReset)(HWAVEIN hwi);
+
+mal_result mal_result_from_MMRESULT(MMRESULT resultMM)
+{
+    switch (resultMM) {
+        case MMSYSERR_NOERROR:      return MAL_SUCCESS;
+        case MMSYSERR_BADDEVICEID:  return MAL_INVALID_ARGS;
+        case MMSYSERR_INVALHANDLE:  return MAL_INVALID_ARGS;
+        case MMSYSERR_NOMEM:        return MAL_OUT_OF_MEMORY;
+        case MMSYSERR_INVALFLAG:    return MAL_INVALID_ARGS;
+        case MMSYSERR_INVALPARAM:   return MAL_INVALID_ARGS;
+        case MMSYSERR_HANDLEBUSY:   return MAL_DEVICE_BUSY;
+        case MMSYSERR_ERROR:        return MAL_ERROR;
+        default:                    return MAL_ERROR;
+    }
+}
+
+mal_result mal_context_init__winmm(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    pContext->winmm.hWinMM = mal_dlopen("winmm.dll");
+    if (pContext->winmm.hWinMM == NULL) {
+        return MAL_NO_BACKEND;
+    }
+
+    pContext->winmm.waveOutGetNumDevs      = mal_dlsym(pContext->winmm.hWinMM, "waveOutGetNumDevs");
+    pContext->winmm.waveOutGetDevCapsA     = mal_dlsym(pContext->winmm.hWinMM, "waveOutGetDevCapsA");
+    pContext->winmm.waveOutOpen            = mal_dlsym(pContext->winmm.hWinMM, "waveOutOpen");
+    pContext->winmm.waveOutClose           = mal_dlsym(pContext->winmm.hWinMM, "waveOutClose");
+    pContext->winmm.waveOutPrepareHeader   = mal_dlsym(pContext->winmm.hWinMM, "waveOutPrepareHeader");
+    pContext->winmm.waveOutUnprepareHeader = mal_dlsym(pContext->winmm.hWinMM, "waveOutUnprepareHeader");
+    pContext->winmm.waveOutWrite           = mal_dlsym(pContext->winmm.hWinMM, "waveOutWrite");
+    pContext->winmm.waveOutReset           = mal_dlsym(pContext->winmm.hWinMM, "waveOutReset");
+    pContext->winmm.waveInGetNumDevs       = mal_dlsym(pContext->winmm.hWinMM, "waveInGetNumDevs");
+    pContext->winmm.waveInGetDevCapsA      = mal_dlsym(pContext->winmm.hWinMM, "waveInGetDevCapsA");
+    pContext->winmm.waveInOpen             = mal_dlsym(pContext->winmm.hWinMM, "waveInOpen");
+    pContext->winmm.waveInClose            = mal_dlsym(pContext->winmm.hWinMM, "waveInClose");
+    pContext->winmm.waveInPrepareHeader    = mal_dlsym(pContext->winmm.hWinMM, "waveInPrepareHeader");
+    pContext->winmm.waveInUnprepareHeader  = mal_dlsym(pContext->winmm.hWinMM, "waveInUnprepareHeader");
+    pContext->winmm.waveInAddBuffer        = mal_dlsym(pContext->winmm.hWinMM, "waveInAddBuffer");
+    pContext->winmm.waveInStart            = mal_dlsym(pContext->winmm.hWinMM, "waveInStart");
+    pContext->winmm.waveInReset            = mal_dlsym(pContext->winmm.hWinMM, "waveInReset");
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__winmm(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_winmm);
+
+    mal_dlclose(pContext->winmm.hWinMM);
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_enumerate_devices__winmm(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    if (type == mal_device_type_playback) {
+        UINT deviceCount = ((MAL_PFN_waveOutGetNumDevs)pContext->winmm.waveOutGetNumDevs)();
+        for (UINT iDevice = 0; iDevice < deviceCount; ++iDevice) {
+            if (pInfo != NULL) {
+                if (infoSize > 0) {
+                    WAVEOUTCAPSA caps;
+                    MMRESULT result = ((MAL_PFN_waveOutGetDevCapsA)pContext->winmm.waveOutGetDevCapsA)(iDevice, &caps, sizeof(caps));
+                    if (result == MMSYSERR_NOERROR) {
+                        pInfo->id.winmm = iDevice;
+                        mal_strncpy_s(pInfo->name, sizeof(pInfo->name), caps.szPname, (size_t)-1);
+                    }
+
+                    pInfo += 1;
+                    infoSize -= 1;
+                    *pCount += 1;
+                }
+            } else {
+                *pCount += 1;
+            }
+        }
+    } else {
+        UINT deviceCount = ((MAL_PFN_waveInGetNumDevs)pContext->winmm.waveInGetNumDevs)();
+        for (UINT iDevice = 0; iDevice < deviceCount; ++iDevice) {
+            if (pInfo != NULL) {
+                if (infoSize > 0) {
+                    WAVEINCAPSA caps;
+                    MMRESULT result = ((MAL_PFN_waveInGetDevCapsA)pContext->winmm.waveInGetDevCapsA)(iDevice, &caps, sizeof(caps));
+                    if (result == MMSYSERR_NOERROR) {
+                        pInfo->id.winmm = iDevice;
+                        mal_strncpy_s(pInfo->name, sizeof(pInfo->name), caps.szPname, (size_t)-1);
+                    }
+
+                    pInfo += 1;
+                    infoSize -= 1;
+                    *pCount += 1;
+                }
+            } else {
+                *pCount += 1;
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__winmm(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        ((MAL_PFN_waveOutClose)pDevice->pContext->winmm.waveOutClose)((HWAVEOUT)pDevice->winmm.hDevice);
+    } else {
+        ((MAL_PFN_waveInClose)pDevice->pContext->winmm.waveInClose)((HWAVEIN)pDevice->winmm.hDevice);
+    }
+
+    mal_free(pDevice->winmm._pHeapData);
+    CloseHandle((HANDLE)pDevice->winmm.hEvent);
+}
+
+static mal_result mal_device_init__winmm(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+    mal_uint32 heapSize;
+    mal_uint32 iBit;
+
+    WORD closestBitsPerSample = 0;
+    WORD closestChannels = 0;
+    DWORD closestSampleRate = 0;
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->winmm);
+
+    UINT winMMDeviceID = 0;
+    if (pDeviceID != NULL) {
+        winMMDeviceID = (UINT)pDeviceID->winmm;
+    }
+
+    const char* errorMsg = "";
+    mal_result errorCode = MAL_ERROR;
+
+
+    // WinMM doesn't seem to have a good way to query the format of the device. Therefore, we'll restrict the formats to the
+    // standard formats documented here https://msdn.microsoft.com/en-us/library/windows/desktop/dd743855(v=vs.85).aspx. If
+    // that link goes stale, just look up the documentation for WAVEOUTCAPS or WAVEINCAPS.
+    WAVEFORMATEX wf;
+    mal_zero_object(&wf);
+    wf.cbSize          = sizeof(wf);
+    wf.wFormatTag      = WAVE_FORMAT_PCM;
+    wf.nChannels       = (WORD)pConfig->channels;
+    wf.nSamplesPerSec  = (DWORD)pConfig->sampleRate;
+    wf.wBitsPerSample  = (WORD)mal_get_sample_size_in_bytes(pConfig->format)*8;
+
+    if (wf.nChannels > 2) {
+        wf.nChannels = 2;
+    }
+
+    if (wf.wBitsPerSample != 8 && wf.wBitsPerSample != 16) {
+        if (wf.wBitsPerSample <= 8) {
+            wf.wBitsPerSample = 8;
+        } else {
+            wf.wBitsPerSample = 16;
+        }
+    }
+
+    if (wf.nSamplesPerSec <= 11025) {
+        wf.nSamplesPerSec = 11025;
+    } else if (wf.nSamplesPerSec <= 22050) {
+        wf.nSamplesPerSec = 22050;
+    } else if (wf.nSamplesPerSec <= 44100) {
+        wf.nSamplesPerSec = 44100;
+    } else if (wf.nSamplesPerSec <= 48000) {
+        wf.nSamplesPerSec = 48000;
+    } else {
+        wf.nSamplesPerSec = 96000;
+    }
+
+
+    // Change the format based on the closest match of the supported standard formats.
+    DWORD dwFormats = 0;
+    if (type == mal_device_type_playback) {
+        WAVEOUTCAPSA caps;
+        if (((MAL_PFN_waveOutGetDevCapsA)pContext->winmm.waveOutGetDevCapsA)(winMMDeviceID, &caps, sizeof(caps)) == MMSYSERR_NOERROR) {
+            dwFormats = caps.dwFormats;
+        } else {
+            errorMsg = "[WinMM] Failed to retrieve internal device caps.", errorCode = MAL_WINMM_FAILED_TO_GET_DEVICE_CAPS;
+            goto on_error;
+        }
+    } else {
+        WAVEINCAPSA caps;
+        if (((MAL_PFN_waveInGetDevCapsA)pContext->winmm.waveInGetDevCapsA)(winMMDeviceID, &caps, sizeof(caps)) == MMSYSERR_NOERROR) {
+            dwFormats = caps.dwFormats;
+        } else {
+            errorMsg = "[WinMM] Failed to retrieve internal device caps.", errorCode = MAL_WINMM_FAILED_TO_GET_DEVICE_CAPS;
+            goto on_error;
+        }
+    }
+
+    if (dwFormats == 0) {
+        errorMsg = "[WinMM] Failed to retrieve the supported formats for the internal device.", errorCode = MAL_WINMM_FAILED_TO_GET_SUPPORTED_FORMATS;
+        goto on_error;
+    }
+
+    for (iBit = 0; iBit < 32; ++iBit) {
+        WORD formatBitsPerSample = 0;
+        WORD formatChannels = 0;
+        DWORD formatSampleRate = 0;
+
+        DWORD format = (dwFormats & (1 << iBit));
+        if (format != 0) {
+            switch (format)
+            {
+                case WAVE_FORMAT_1M08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 1;
+                    formatSampleRate = 110025;
+                } break;
+                case WAVE_FORMAT_1M16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 1;
+                    formatSampleRate = 110025;
+                } break;
+                case WAVE_FORMAT_1S08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 2;
+                    formatSampleRate = 110025;
+                } break;
+                case WAVE_FORMAT_1S16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 2;
+                    formatSampleRate = 110025;
+                } break;
+                case WAVE_FORMAT_2M08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 1;
+                    formatSampleRate = 22050;
+                } break;
+                case WAVE_FORMAT_2M16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 1;
+                    formatSampleRate = 22050;
+                } break;
+                case WAVE_FORMAT_2S08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 2;
+                    formatSampleRate = 22050;
+                } break;
+                case WAVE_FORMAT_2S16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 2;
+                    formatSampleRate = 22050;
+                } break;
+                case WAVE_FORMAT_44M08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 1;
+                    formatSampleRate = 44100;
+                } break;
+                case WAVE_FORMAT_44M16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 1;
+                    formatSampleRate = 44100;
+                } break;
+                case WAVE_FORMAT_44S08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 2;
+                    formatSampleRate = 44100;
+                } break;
+                case WAVE_FORMAT_44S16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 2;
+                    formatSampleRate = 44100;
+                } break;
+                case WAVE_FORMAT_48M08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 1;
+                    formatSampleRate = 48000;
+                } break;
+                case WAVE_FORMAT_48M16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 1;
+                    formatSampleRate = 48000;
+                } break;
+                case WAVE_FORMAT_48S08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 2;
+                    formatSampleRate = 48000;
+                } break;
+                case WAVE_FORMAT_48S16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 2;
+                    formatSampleRate = 48000;
+                } break;
+                case WAVE_FORMAT_96M08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 1;
+                    formatSampleRate = 96000;
+                } break;
+                case WAVE_FORMAT_96M16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 1;
+                    formatSampleRate = 96000;
+                } break;
+                case WAVE_FORMAT_96S08:
+                {
+                    formatBitsPerSample = 8;
+                    formatChannels = 2;
+                    formatSampleRate = 96000;
+                } break;
+                case WAVE_FORMAT_96S16:
+                {
+                    formatBitsPerSample = 16;
+                    formatChannels = 2;
+                    formatSampleRate = 96000;
+                } break;
+                default:
+                {
+                    errorMsg =  "[WinMM] The internal device does not support any of the standard formats.", errorCode = MAL_ERROR;    // <-- Should never hit this.
+                    goto on_error;
+                } break;
+            }
+
+            if (formatBitsPerSample == wf.wBitsPerSample && formatChannels == wf.nChannels && formatSampleRate == wf.nSamplesPerSec) {
+                break;  // It's an exact match.
+            } else {
+                // It's not an exact match. Compare it with the closest match.
+                if (closestBitsPerSample == 0) {
+                    // This is the first format, so nothing to compare against.
+                    closestBitsPerSample = formatBitsPerSample;
+                    closestChannels = formatChannels;
+                    closestSampleRate = formatSampleRate;
+                } else {
+                    // Prefer the channel count be the same over the others.
+                    if (formatChannels != closestChannels) {
+                        // Channels aren't equal. Favour the one equal to our desired channel count.
+                        if (formatChannels == wf.nChannels) {
+                            closestBitsPerSample = formatBitsPerSample;
+                            closestChannels = formatChannels;
+                            closestSampleRate = formatSampleRate;
+                        }
+                    } else {
+                        // The channels are equal. Look at the format now.
+                        if (formatBitsPerSample != closestBitsPerSample) {
+                            if (formatBitsPerSample == wf.wBitsPerSample) {
+                                closestBitsPerSample = formatBitsPerSample;
+                                closestChannels = formatChannels;
+                                closestSampleRate = formatSampleRate;
+                            }
+                        } else {
+                            // Both the channels and formats are the same, so now just favour whichever's sample rate is closest to the requested rate.
+                            mal_uint32 closestRateDiff = (closestSampleRate > wf.nSamplesPerSec) ? (closestSampleRate - wf.nSamplesPerSec) : (wf.nSamplesPerSec - closestSampleRate);
+                            mal_uint32 formatRateDiff  = (formatSampleRate  > wf.nSamplesPerSec) ? (formatSampleRate  - wf.nSamplesPerSec) : (wf.nSamplesPerSec - formatSampleRate);
+                            if (formatRateDiff < closestRateDiff) {
+                                closestBitsPerSample = formatBitsPerSample;
+                                closestChannels = formatChannels;
+                                closestSampleRate = formatSampleRate;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    wf.wBitsPerSample  = closestBitsPerSample;
+    wf.nChannels       = closestChannels;
+    wf.nSamplesPerSec  = closestSampleRate;
+    wf.nBlockAlign     = (wf.nChannels * wf.wBitsPerSample) / 8;
+    wf.nAvgBytesPerSec = wf.nBlockAlign * wf.nSamplesPerSec;
+
+
+    // We use an event to know when a new fragment needs to be enqueued.
+    pDevice->winmm.hEvent = (mal_handle)CreateEvent(NULL, TRUE, TRUE, NULL);
+    if (pDevice->winmm.hEvent == NULL) {
+        errorMsg = "[WinMM] Failed to create event for fragment enqueing.", errorCode = MAL_FAILED_TO_CREATE_EVENT;
+        goto on_error;
+    }
+
+
+    if (type == mal_device_type_playback) {
+        MMRESULT result = ((MAL_PFN_waveOutOpen)pContext->winmm.waveOutOpen)((LPHWAVEOUT)&pDevice->winmm.hDevice, winMMDeviceID, &wf, (DWORD_PTR)pDevice->winmm.hEvent, (DWORD_PTR)pDevice, CALLBACK_EVENT | WAVE_ALLOWSYNC);
+        if (result != MMSYSERR_NOERROR) {
+            errorMsg = "[WinMM] Failed to open playback device.", errorCode = MAL_FAILED_TO_OPEN_BACKEND_DEVICE;
+            goto on_error;
+        }
+    } else {
+        MMRESULT result = ((MAL_PFN_waveInOpen)pDevice->pContext->winmm.waveInOpen)((LPHWAVEIN)&pDevice->winmm.hDevice, winMMDeviceID, &wf, (DWORD_PTR)pDevice->winmm.hEvent, (DWORD_PTR)pDevice, CALLBACK_EVENT | WAVE_ALLOWSYNC);
+        if (result != MMSYSERR_NOERROR) {
+            errorMsg = "[WinMM] Failed to open capture device.", errorCode = MAL_FAILED_TO_OPEN_BACKEND_DEVICE;
+            goto on_error;
+        }
+    }
+
+
+    // The internal formats need to be set based on the wf object.
+    if (wf.wFormatTag == WAVE_FORMAT_PCM) {
+        switch (wf.wBitsPerSample) {
+            case 8:  pDevice->internalFormat = mal_format_u8;  break;
+            case 16: pDevice->internalFormat = mal_format_s16; break;
+            case 24: pDevice->internalFormat = mal_format_s24; break;
+            case 32: pDevice->internalFormat = mal_format_s32; break;
+            default: mal_post_error(pDevice, "[WinMM] The device's internal format is not supported by mini_al.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+    } else {
+        errorMsg = "[WinMM] The device's internal format is not supported by mini_al.", errorCode = MAL_FORMAT_NOT_SUPPORTED;
+        goto on_error;
+    }
+
+    pDevice->internalChannels = wf.nChannels;
+    pDevice->internalSampleRate = wf.nSamplesPerSec;
+
+
+    // Just use the default channel mapping. WinMM only supports mono or stereo anyway so it'll reliably be left/right order for stereo.
+    mal_get_default_channel_mapping(pDevice->pContext->backend, pDevice->internalChannels, pDevice->internalChannelMap);
+
+
+    // Latency with WinMM seems pretty bad from my testing... Need to increase the default buffer size.
+    if (pDevice->usingDefaultBufferSize) {
+        if (pDevice->type == mal_device_type_playback) {
+            pDevice->bufferSizeInFrames *= 4; // <-- Might need to fiddle with this to find a more ideal value. May even be able to just add a fixed amount rather than scaling.
+        } else {
+            pDevice->bufferSizeInFrames *= 2;
+        }
+    }
+
+    // The size of the intermediary buffer needs to be able to fit every fragment.
+    pDevice->winmm.fragmentSizeInFrames = pDevice->bufferSizeInFrames / pDevice->periods;
+    pDevice->winmm.fragmentSizeInBytes = pDevice->winmm.fragmentSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+
+    heapSize = (sizeof(WAVEHDR) * pDevice->periods) + (pDevice->winmm.fragmentSizeInBytes * pDevice->periods);
+    pDevice->winmm._pHeapData = (mal_uint8*)mal_malloc(heapSize);
+    if (pDevice->winmm._pHeapData == NULL) {
+        errorMsg = "[WinMM] Failed to allocate memory for the intermediary buffer.", errorCode = MAL_OUT_OF_MEMORY;
+        goto on_error;
+    }
+
+    mal_zero_memory(pDevice->winmm._pHeapData, pDevice->winmm.fragmentSizeInBytes * pDevice->periods);
+
+    pDevice->winmm.pWAVEHDR = pDevice->winmm._pHeapData;
+    pDevice->winmm.pIntermediaryBuffer = pDevice->winmm._pHeapData + (sizeof(WAVEHDR) * pDevice->periods);
+
+
+    return MAL_SUCCESS;
+
+on_error:
+    if (pDevice->type == mal_device_type_playback) {
+        ((MAL_PFN_waveOutClose)pContext->winmm.waveOutClose)((HWAVEOUT)pDevice->winmm.hDevice);
+    } else {
+        ((MAL_PFN_waveInClose)pContext->winmm.waveInClose)((HWAVEIN)pDevice->winmm.hDevice);
+    }
+
+    mal_free(pDevice->winmm._pHeapData);
+    return mal_post_error(pDevice, errorMsg, errorCode);
+}
+
+
+static mal_result mal_device__start_backend__winmm(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        // Playback. The device is started when we call waveOutWrite() with a block of data. From MSDN:
+        //
+        //     Unless the device is paused by calling the waveOutPause function, playback begins when the first data block is sent to the device.
+        //
+        // When starting the device we commit every fragment. We signal the event before calling waveOutWrite().
+        mal_uint32 i;
+        for (i = 0; i < pDevice->periods; ++i) {
+            mal_zero_object(&((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i]);
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData = (LPSTR)(pDevice->winmm.pIntermediaryBuffer + (pDevice->winmm.fragmentSizeInBytes * i));
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwBufferLength = pDevice->winmm.fragmentSizeInBytes;
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwFlags = 0L;
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwLoops = 0L;
+            mal_device__read_frames_from_client(pDevice, pDevice->winmm.fragmentSizeInFrames, ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData);
+
+            if (((MAL_PFN_waveOutPrepareHeader)pDevice->pContext->winmm.waveOutPrepareHeader)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR)) != MMSYSERR_NOERROR) {
+                return mal_post_error(pDevice, "[WinMM] Failed to start backend device. Failed to prepare header.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        }
+
+        ResetEvent(pDevice->winmm.hEvent);
+
+        for (i = 0; i < pDevice->periods; ++i) {
+            if (((MAL_PFN_waveOutWrite)pDevice->pContext->winmm.waveOutWrite)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR)) != MMSYSERR_NOERROR) {
+                return mal_post_error(pDevice, "[WinMM] Failed to start backend device. Failed to send data to the backend device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        }
+    } else {
+        // Capture.
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            mal_zero_object(&((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i]);
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData = (LPSTR)(pDevice->winmm.pIntermediaryBuffer + (pDevice->winmm.fragmentSizeInBytes * i));
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwBufferLength = pDevice->winmm.fragmentSizeInBytes;
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwFlags = 0L;
+            ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwLoops = 0L;
+
+            MMRESULT resultMM = ((MAL_PFN_waveInPrepareHeader)pDevice->pContext->winmm.waveInPrepareHeader)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+            if (resultMM != MMSYSERR_NOERROR) {
+                mal_post_error(pDevice, "[WinMM] Failed to prepare header for capture device in preparation for adding a new capture buffer for the device.", mal_result_from_MMRESULT(resultMM));
+                break;
+            }
+
+            resultMM = ((MAL_PFN_waveInAddBuffer)pDevice->pContext->winmm.waveInAddBuffer)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+            if (resultMM != MMSYSERR_NOERROR) {
+                mal_post_error(pDevice, "[WinMM] Failed to add new capture buffer to the internal capture device.", mal_result_from_MMRESULT(resultMM));
+                break;
+            }
+        }
+
+        ResetEvent(pDevice->winmm.hEvent);
+
+        if (((MAL_PFN_waveInStart)pDevice->pContext->winmm.waveInStart)((HWAVEIN)pDevice->winmm.hDevice) != MMSYSERR_NOERROR) {
+            return mal_post_error(pDevice, "[WinMM] Failed to start backend device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+        }
+    }
+
+    pDevice->winmm.iNextHeader = 0;
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__winmm(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        MMRESULT resultMM = ((MAL_PFN_waveOutReset)pDevice->pContext->winmm.waveOutReset)((HWAVEOUT)pDevice->winmm.hDevice);
+        if (resultMM != MMSYSERR_NOERROR) {
+            mal_post_error(pDevice, "[WinMM] WARNING: Failed to reset playback device.", mal_result_from_MMRESULT(resultMM));
+        }
+
+        // Unprepare all WAVEHDR structures.
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            resultMM = ((MAL_PFN_waveOutUnprepareHeader)pDevice->pContext->winmm.waveOutUnprepareHeader)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+            if (resultMM != MMSYSERR_NOERROR) {
+                mal_post_error(pDevice, "[WinMM] WARNING: Failed to unprepare header for playback device.", mal_result_from_MMRESULT(resultMM));
+            }
+        }
+    } else {
+        MMRESULT resultMM = ((MAL_PFN_waveInReset)pDevice->pContext->winmm.waveInReset)((HWAVEIN)pDevice->winmm.hDevice);
+        if (resultMM != MMSYSERR_NOERROR) {
+            mal_post_error(pDevice, "[WinMM] WARNING: Failed to reset capture device.", mal_result_from_MMRESULT(resultMM));
+        }
+
+        // Unprepare all WAVEHDR structures.
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            resultMM = ((MAL_PFN_waveInUnprepareHeader)pDevice->pContext->winmm.waveInUnprepareHeader)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+            if (resultMM != MMSYSERR_NOERROR) {
+                mal_post_error(pDevice, "[WinMM] WARNING: Failed to unprepare header for playback device.", mal_result_from_MMRESULT(resultMM));
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__winmm(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->winmm.breakFromMainLoop = MAL_TRUE;
+    SetEvent((HANDLE)pDevice->winmm.hEvent);
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__main_loop__winmm(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_uint32 counter;
+
+    pDevice->winmm.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->winmm.breakFromMainLoop) {
+        // Wait for a block of data to finish processing...
+        if (WaitForSingleObject((HANDLE)pDevice->winmm.hEvent, INFINITE) != WAIT_OBJECT_0) {
+            break;
+        }
+
+        // Break from the main loop if the device isn't started anymore. Likely what's happened is the application
+        // has requested that the device be stopped.
+        if (!mal_device_is_started(pDevice)) {
+            break;
+        }
+
+        // Any headers that are marked as done need to be handled. We start by processing the completed blocks. Then we reset the event
+        // and then write or add replacement buffers to the device.
+        mal_uint32 iFirstHeader = pDevice->winmm.iNextHeader;
+        for (counter = 0; counter < pDevice->periods; ++counter) {
+            mal_uint32 i = pDevice->winmm.iNextHeader;
+            if ((((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwFlags & WHDR_DONE) == 0) {
+                break;
+            }
+
+            if (pDevice->type == mal_device_type_playback) {
+                // Playback.
+                MMRESULT resultMM = ((MAL_PFN_waveOutUnprepareHeader)pDevice->pContext->winmm.waveOutUnprepareHeader)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                if (resultMM != MMSYSERR_NOERROR) {
+                    mal_post_error(pDevice, "[WinMM] Failed to unprepare header for playback device in preparation for sending a new block of data to the device for playback.", mal_result_from_MMRESULT(resultMM));
+                    break;
+                }
+
+                mal_zero_object(&((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i]);
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData = (LPSTR)(pDevice->winmm.pIntermediaryBuffer + (pDevice->winmm.fragmentSizeInBytes * i));
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwBufferLength = pDevice->winmm.fragmentSizeInBytes;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwFlags = 0L;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwLoops = 0L;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwUser = 1;     // <-- Used in the next section to identify the buffers that needs to be re-written to the device.
+                mal_device__read_frames_from_client(pDevice, pDevice->winmm.fragmentSizeInFrames, ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData);
+
+                resultMM = ((MAL_PFN_waveOutPrepareHeader)pDevice->pContext->winmm.waveOutPrepareHeader)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                if (resultMM != MMSYSERR_NOERROR) {
+                    mal_post_error(pDevice, "[WinMM] Failed to prepare header for playback device in preparation for sending a new block of data to the device for playback.", mal_result_from_MMRESULT(resultMM));
+                    break;
+                }
+            } else {
+                // Capture.
+                mal_uint32 framesCaptured = (mal_uint32)(((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwBytesRecorded) / pDevice->internalChannels / mal_get_sample_size_in_bytes(pDevice->internalFormat);
+                if (framesCaptured > 0) {
+                    mal_device__send_frames_to_client(pDevice, framesCaptured, ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData);
+                }
+
+                MMRESULT resultMM = ((MAL_PFN_waveInUnprepareHeader)pDevice->pContext->winmm.waveInUnprepareHeader)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                if (resultMM != MMSYSERR_NOERROR) {
+                    mal_post_error(pDevice, "[WinMM] Failed to unprepare header for capture device in preparation for adding a new capture buffer for the device.", mal_result_from_MMRESULT(resultMM));
+                    break;
+                }
+
+                mal_zero_object(&((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i]);
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].lpData = (LPSTR)(pDevice->winmm.pIntermediaryBuffer + (pDevice->winmm.fragmentSizeInBytes * i));
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwBufferLength = pDevice->winmm.fragmentSizeInBytes;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwFlags = 0L;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwLoops = 0L;
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwUser = 1;     // <-- Used in the next section to identify the buffers that needs to be re-added to the device.
+
+                resultMM = ((MAL_PFN_waveInPrepareHeader)pDevice->pContext->winmm.waveInPrepareHeader)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                if (resultMM != MMSYSERR_NOERROR) {
+                    mal_post_error(pDevice, "[WinMM] Failed to prepare header for capture device in preparation for adding a new capture buffer for the device.", mal_result_from_MMRESULT(resultMM));
+                    break;
+                }
+            }
+
+            pDevice->winmm.iNextHeader = (pDevice->winmm.iNextHeader + 1) % pDevice->periods;
+        }
+
+        ResetEvent((HANDLE)pDevice->winmm.hEvent);
+
+        for (counter = 0; counter < pDevice->periods; ++counter) {
+            mal_uint32 i = (iFirstHeader + counter) % pDevice->periods;
+
+            if (((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwUser == 1) {
+                ((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i].dwUser = 0;
+
+                if (pDevice->type == mal_device_type_playback) {
+                    // Playback.
+                    MMRESULT resultMM = ((MAL_PFN_waveOutWrite)pDevice->pContext->winmm.waveOutWrite)((HWAVEOUT)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                    if (resultMM != MMSYSERR_NOERROR) {
+                        mal_post_error(pDevice, "[WinMM] Failed to write data to the internal playback device.", mal_result_from_MMRESULT(resultMM));
+                        break;
+                    }
+                } else {
+                    // Capture.
+                    MMRESULT resultMM = ((MAL_PFN_waveInAddBuffer)pDevice->pContext->winmm.waveInAddBuffer)((HWAVEIN)pDevice->winmm.hDevice, &((LPWAVEHDR)pDevice->winmm.pWAVEHDR)[i], sizeof(WAVEHDR));
+                    if (resultMM != MMSYSERR_NOERROR) {
+                        mal_post_error(pDevice, "[WinMM] Failed to add new capture buffer to the internal capture device.", mal_result_from_MMRESULT(resultMM));
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// ALSA Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_ALSA
+#include <alsa/asoundlib.h>
+
+// This array allows mini_al to control device-specific default buffer sizes. This uses a scaling factor. Order is important. If
+// any part of the string is present in the device's name, the associated scale will be used.
+struct
+{
+    const char* name;
+    float scale;
+} g_malDefaultBufferSizeScalesALSA[] = {
+    {"bcm2835 IEC958/HDMI", 32},
+    {"bcm2835 ALSA",        32}
+};
+
+static float mal_find_default_buffer_size_scale__alsa(const char* deviceName)
+{
+    if (deviceName == NULL) {
+        return 1;
+    }
+
+    for (size_t i = 0; i < mal_countof(g_malDefaultBufferSizeScalesALSA); ++i) {
+        if (strstr(g_malDefaultBufferSizeScalesALSA[i].name, deviceName) != NULL) {
+            return g_malDefaultBufferSizeScalesALSA[i].scale;
+        }
+    }
+
+    return 1;
+}
+
+
+typedef int               (* mal_snd_pcm_open_proc)                          (snd_pcm_t **pcm, const char *name, snd_pcm_stream_t stream, int mode);
+typedef int               (* mal_snd_pcm_close_proc)                         (snd_pcm_t *pcm);
+typedef size_t            (* mal_snd_pcm_hw_params_sizeof_proc)              (void);
+typedef int               (* mal_snd_pcm_hw_params_any_proc)                 (snd_pcm_t *pcm, snd_pcm_hw_params_t *params);
+typedef int               (* mal_snd_pcm_hw_params_set_format_proc)          (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, snd_pcm_format_t val);
+typedef int               (* mal_snd_pcm_hw_params_set_format_first_proc)    (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, snd_pcm_format_t *format);
+typedef void              (* mal_snd_pcm_hw_params_get_format_mask_proc)     (snd_pcm_hw_params_t *params, snd_pcm_format_mask_t *mask);
+typedef int               (* mal_snd_pcm_hw_params_set_channels_near_proc)   (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, unsigned int *val);
+typedef int               (* mal_snd_pcm_hw_params_set_rate_resample_proc)   (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, unsigned int val);
+typedef int               (* mal_snd_pcm_hw_params_set_rate_near_proc)       (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, unsigned int *val, int *dir);
+typedef int               (* mal_snd_pcm_hw_params_set_buffer_size_near_proc)(snd_pcm_t *pcm, snd_pcm_hw_params_t *params, snd_pcm_uframes_t *val);
+typedef int               (* mal_snd_pcm_hw_params_set_periods_near_proc)    (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, unsigned int *val, int *dir);
+typedef int               (* mal_snd_pcm_hw_params_set_access_proc)          (snd_pcm_t *pcm, snd_pcm_hw_params_t *params, snd_pcm_access_t _access);
+typedef int               (* mal_snd_pcm_hw_params_get_format_proc)          (snd_pcm_hw_params_t *params, snd_pcm_format_t *format);
+typedef int               (* mal_snd_pcm_hw_params_get_channels_proc)        (snd_pcm_hw_params_t *params, unsigned int *val);
+typedef int               (* mal_snd_pcm_hw_params_get_rate_proc)            (snd_pcm_hw_params_t *params, unsigned int *rate, int *dir);
+typedef int               (* mal_snd_pcm_hw_params_get_buffer_size_proc)     (snd_pcm_hw_params_t *params, snd_pcm_uframes_t *val);
+typedef int               (* mal_snd_pcm_hw_params_get_periods_proc)         (snd_pcm_hw_params_t *params, unsigned int *val, int *dir);
+typedef int               (* mal_snd_pcm_hw_params_get_access_proc)          (snd_pcm_hw_params_t *params, snd_pcm_access_t *_access);
+typedef int               (* mal_snd_pcm_hw_params_proc)                     (snd_pcm_t *pcm, snd_pcm_hw_params_t *params);
+typedef size_t            (* mal_snd_pcm_sw_params_sizeof_proc)              (void);
+typedef int               (* mal_snd_pcm_sw_params_current_proc)             (snd_pcm_t *pcm, snd_pcm_sw_params_t *params);
+typedef int               (* mal_snd_pcm_sw_params_set_avail_min_proc)       (snd_pcm_t *pcm, snd_pcm_sw_params_t *params, snd_pcm_uframes_t val);
+typedef int               (* mal_snd_pcm_sw_params_set_start_threshold_proc) (snd_pcm_t *pcm, snd_pcm_sw_params_t *params, snd_pcm_uframes_t val);
+typedef int               (* mal_snd_pcm_sw_params_proc)                     (snd_pcm_t *pcm, snd_pcm_sw_params_t *params);
+typedef size_t            (* mal_snd_pcm_format_mask_sizeof_proc)            (void);
+typedef int               (* mal_snd_pcm_format_mask_test_proc)              (const snd_pcm_format_mask_t *mask, snd_pcm_format_t val);
+typedef snd_pcm_chmap_t * (* mal_snd_pcm_get_chmap_proc)                     (snd_pcm_t *pcm);
+typedef int               (* mal_snd_pcm_prepare_proc)                       (snd_pcm_t *pcm);
+typedef int               (* mal_snd_pcm_start_proc)                         (snd_pcm_t *pcm);
+typedef int               (* mal_snd_pcm_drop_proc)                          (snd_pcm_t *pcm);
+typedef int               (* mal_snd_device_name_hint_proc)                  (int card, const char *iface, void ***hints);
+typedef char *            (* mal_snd_device_name_get_hint_proc)              (const void *hint, const char *id);
+typedef int               (* mal_snd_card_get_index_proc)                    (const char *name);
+typedef int               (* mal_snd_device_name_free_hint_proc)             (void **hints);
+typedef int               (* mal_snd_pcm_mmap_begin_proc)                    (snd_pcm_t *pcm, const snd_pcm_channel_area_t **areas, snd_pcm_uframes_t *offset, snd_pcm_uframes_t *frames);
+typedef snd_pcm_sframes_t (* mal_snd_pcm_mmap_commit_proc)                   (snd_pcm_t *pcm, snd_pcm_uframes_t offset, snd_pcm_uframes_t frames);
+typedef int               (* mal_snd_pcm_recover_proc)                       (snd_pcm_t *pcm, int err, int silent);
+typedef snd_pcm_sframes_t (* mal_snd_pcm_readi_proc)                         (snd_pcm_t *pcm, void *buffer, snd_pcm_uframes_t size);
+typedef snd_pcm_sframes_t (* mal_snd_pcm_writei_proc)                        (snd_pcm_t *pcm, const void *buffer, snd_pcm_uframes_t size);
+typedef snd_pcm_sframes_t (* mal_snd_pcm_avail_proc)                         (snd_pcm_t *pcm);
+typedef snd_pcm_sframes_t (* mal_snd_pcm_avail_update_proc)                  (snd_pcm_t *pcm);
+typedef int               (* mal_snd_pcm_wait_proc)                          (snd_pcm_t *pcm, int timeout);
+typedef int               (* mal_snd_pcm_info)                               (snd_pcm_t *pcm, snd_pcm_info_t* info);
+typedef size_t            (* mal_snd_pcm_info_sizeof)                        ();
+typedef const char*       (* mal_snd_pcm_info_get_name)                      (const snd_pcm_info_t* info);
+
+static snd_pcm_format_t g_mal_ALSAFormats[] = {
+    SND_PCM_FORMAT_UNKNOWN,     // mal_format_unknown
+    SND_PCM_FORMAT_U8,          // mal_format_u8
+    SND_PCM_FORMAT_S16_LE,      // mal_format_s16
+    SND_PCM_FORMAT_S24_3LE,     // mal_format_s24
+    SND_PCM_FORMAT_S32_LE,      // mal_format_s32
+    SND_PCM_FORMAT_FLOAT_LE     // mal_format_f32
+};
+
+snd_pcm_format_t mal_convert_mal_format_to_alsa_format(mal_format format)
+{
+    return g_mal_ALSAFormats[format];
+}
+
+mal_format mal_convert_alsa_format_to_mal_format(snd_pcm_format_t formatALSA)
+{
+    switch (formatALSA)
+    {
+        case SND_PCM_FORMAT_U8:       return mal_format_u8;
+        case SND_PCM_FORMAT_S16_LE:   return mal_format_s16;
+        case SND_PCM_FORMAT_S24_3LE:  return mal_format_s24;
+        case SND_PCM_FORMAT_S32_LE:   return mal_format_s32;
+        case SND_PCM_FORMAT_FLOAT_LE: return mal_format_f32;
+        default:                      return mal_format_unknown;
+    }
+}
+
+mal_channel mal_convert_alsa_channel_position_to_mal_channel(unsigned int alsaChannelPos)
+{
+    switch (alsaChannelPos)
+    {
+        case SND_CHMAP_FL:  return MAL_CHANNEL_FRONT_LEFT;
+        case SND_CHMAP_FR:  return MAL_CHANNEL_FRONT_RIGHT;
+        case SND_CHMAP_RL:  return MAL_CHANNEL_BACK_LEFT;
+        case SND_CHMAP_RR:  return MAL_CHANNEL_BACK_RIGHT;
+        case SND_CHMAP_FC:  return MAL_CHANNEL_FRONT_CENTER;
+        case SND_CHMAP_LFE: return MAL_CHANNEL_LFE;
+        case SND_CHMAP_SL:  return MAL_CHANNEL_SIDE_LEFT;
+        case SND_CHMAP_SR:  return MAL_CHANNEL_SIDE_RIGHT;
+        case SND_CHMAP_RC:  return MAL_CHANNEL_BACK_CENTER;
+        case SND_CHMAP_FLC: return MAL_CHANNEL_FRONT_LEFT_CENTER;
+        case SND_CHMAP_FRC: return MAL_CHANNEL_FRONT_RIGHT_CENTER;
+        case SND_CHMAP_RLC: return 0;
+        case SND_CHMAP_RRC: return 0;
+        case SND_CHMAP_FLW: return 0;
+        case SND_CHMAP_FRW: return 0;
+        case SND_CHMAP_FLH: return 0;
+        case SND_CHMAP_FCH: return 0;
+        case SND_CHMAP_FRH: return 0;
+        case SND_CHMAP_TC:  return MAL_CHANNEL_TOP_CENTER;
+        case SND_CHMAP_TFL: return MAL_CHANNEL_TOP_FRONT_LEFT;
+        case SND_CHMAP_TFR: return MAL_CHANNEL_TOP_FRONT_RIGHT;
+        case SND_CHMAP_TFC: return MAL_CHANNEL_TOP_FRONT_CENTER;
+        case SND_CHMAP_TRL: return MAL_CHANNEL_TOP_BACK_LEFT;
+        case SND_CHMAP_TRR: return MAL_CHANNEL_TOP_BACK_RIGHT;
+        case SND_CHMAP_TRC: return MAL_CHANNEL_TOP_BACK_CENTER;
+        default: break;
+    }
+
+    return 0;
+}
+
+mal_result mal_context_init__alsa(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    pContext->alsa.asoundSO = mal_dlopen("libasound.so");
+    if (pContext->alsa.asoundSO == NULL) {
+        return MAL_NO_BACKEND;
+    }
+
+    pContext->alsa.snd_pcm_open                           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_open");
+    pContext->alsa.snd_pcm_close                          = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_close");
+    pContext->alsa.snd_pcm_hw_params_sizeof               = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_sizeof");
+    pContext->alsa.snd_pcm_hw_params_any                  = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_any");
+    pContext->alsa.snd_pcm_hw_params_set_format           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_format");
+    pContext->alsa.snd_pcm_hw_params_set_format_first     = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_format_first");
+    pContext->alsa.snd_pcm_hw_params_get_format_mask      = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_format_mask");
+    pContext->alsa.snd_pcm_hw_params_set_channels_near    = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_channels_near");
+    pContext->alsa.snd_pcm_hw_params_set_rate_resample    = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_rate_resample");
+    pContext->alsa.snd_pcm_hw_params_set_rate_near        = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_rate_near");
+    pContext->alsa.snd_pcm_hw_params_set_buffer_size_near = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_buffer_size_near");
+    pContext->alsa.snd_pcm_hw_params_set_periods_near     = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_periods_near");
+    pContext->alsa.snd_pcm_hw_params_set_access           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_set_access");
+    pContext->alsa.snd_pcm_hw_params_get_format           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_format");
+    pContext->alsa.snd_pcm_hw_params_get_channels         = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_channels");
+    pContext->alsa.snd_pcm_hw_params_get_rate             = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_rate");
+    pContext->alsa.snd_pcm_hw_params_get_buffer_size      = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_buffer_size");
+    pContext->alsa.snd_pcm_hw_params_get_periods          = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_periods");
+    pContext->alsa.snd_pcm_hw_params_get_access           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params_get_access");
+    pContext->alsa.snd_pcm_hw_params                      = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_hw_params");
+    pContext->alsa.snd_pcm_sw_params_sizeof               = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_sw_params_sizeof");
+    pContext->alsa.snd_pcm_sw_params_current              = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_sw_params_current");
+    pContext->alsa.snd_pcm_sw_params_set_avail_min        = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_sw_params_set_avail_min");
+    pContext->alsa.snd_pcm_sw_params_set_start_threshold  = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_sw_params_set_start_threshold");
+    pContext->alsa.snd_pcm_sw_params                      = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_sw_params");
+    pContext->alsa.snd_pcm_format_mask_sizeof             = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_format_mask_sizeof");
+    pContext->alsa.snd_pcm_format_mask_test               = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_format_mask_test");
+    pContext->alsa.snd_pcm_get_chmap                      = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_get_chmap");
+    pContext->alsa.snd_pcm_prepare                        = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_prepare");
+    pContext->alsa.snd_pcm_start                          = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_start");
+    pContext->alsa.snd_pcm_drop                           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_drop");
+    pContext->alsa.snd_device_name_hint                   = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_device_name_hint");
+    pContext->alsa.snd_device_name_get_hint               = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_device_name_get_hint");
+    pContext->alsa.snd_card_get_index                     = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_card_get_index");
+    pContext->alsa.snd_device_name_free_hint              = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_device_name_free_hint");
+    pContext->alsa.snd_pcm_mmap_begin                     = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_mmap_begin");
+    pContext->alsa.snd_pcm_mmap_commit                    = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_mmap_commit");
+    pContext->alsa.snd_pcm_recover                        = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_recover");
+    pContext->alsa.snd_pcm_readi                          = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_readi");
+    pContext->alsa.snd_pcm_writei                         = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_writei");
+    pContext->alsa.snd_pcm_avail                          = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_avail");
+    pContext->alsa.snd_pcm_avail_update                   = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_avail_update");
+    pContext->alsa.snd_pcm_wait                           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_wait");
+    pContext->alsa.snd_pcm_info                           = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_info");
+    pContext->alsa.snd_pcm_info_sizeof                    = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_info_sizeof");
+    pContext->alsa.snd_pcm_info_get_name                  = (mal_proc)mal_dlsym(pContext->alsa.asoundSO, "snd_pcm_info_get_name");
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__alsa(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_alsa);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+static const char* mal_find_char(const char* str, char c, int* index)
+{
+    int i = 0;
+    for (;;) {
+        if (str[i] == '\0') {
+            if (index) *index = -1;
+            return NULL;
+        }
+
+        if (str[i] == c) {
+            if (index) *index = i;
+            return str + i;
+        }
+
+        i += 1;
+    }
+
+    // Should never get here, but treat it as though the character was not found to make me feel
+    // better inside.
+    if (index) *index = -1;
+    return NULL;
+}
+
+// Waits for a number of frames to become available for either capture or playback. The return
+// value is the number of frames available.
+//
+// This will return early if the main loop is broken with mal_device__break_main_loop().
+static mal_uint32 mal_device__wait_for_frames__alsa(mal_device* pDevice, mal_bool32* pRequiresRestart)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pRequiresRestart) *pRequiresRestart = MAL_FALSE;
+
+    mal_uint32 periodSizeInFrames = pDevice->bufferSizeInFrames / pDevice->periods;
+
+    while (!pDevice->alsa.breakFromMainLoop) {
+        // Wait for something to become available. The timeout should not affect latency - it's only used to break from the wait
+        // so we can check whether or not the device has been stopped.
+        const int timeoutInMilliseconds = 10;
+        int waitResult = ((mal_snd_pcm_wait_proc)pDevice->pContext->alsa.snd_pcm_wait)((snd_pcm_t*)pDevice->alsa.pPCM, timeoutInMilliseconds);
+        if (waitResult < 0) {
+            if (waitResult == -EPIPE) {
+                if (((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, waitResult, MAL_TRUE) < 0) {
+                    return 0;
+                }
+
+                if (pRequiresRestart) *pRequiresRestart = MAL_TRUE; // A device recovery means a restart for mmap mode.
+            }
+        }
+
+        if (pDevice->alsa.breakFromMainLoop) {
+            return 0;
+        }
+
+        snd_pcm_sframes_t framesAvailable = ((mal_snd_pcm_avail_update_proc)pDevice->pContext->alsa.snd_pcm_avail_update)((snd_pcm_t*)pDevice->alsa.pPCM);
+        if (framesAvailable < 0) {
+            if (framesAvailable == -EPIPE) {
+                if (((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, framesAvailable, MAL_TRUE) < 0) {
+                    return 0;
+                }
+
+                if (pRequiresRestart) *pRequiresRestart = MAL_TRUE; // A device recovery means a restart for mmap mode.
+
+                // Try again, but if it fails this time just return an error.
+                framesAvailable = ((mal_snd_pcm_avail_update_proc)pDevice->pContext->alsa.snd_pcm_avail_update)((snd_pcm_t*)pDevice->alsa.pPCM);
+                if (framesAvailable < 0) {
+                    return 0;
+                }
+            }
+        }
+
+        // Keep the returned number of samples consistent and based on the period size.
+        if (framesAvailable >= periodSizeInFrames) {
+            return periodSizeInFrames;
+        }
+    }
+
+    // We'll get here if the loop was terminated. Just return whatever's available.
+    snd_pcm_sframes_t framesAvailable = ((mal_snd_pcm_avail_update_proc)pDevice->pContext->alsa.snd_pcm_avail_update)((snd_pcm_t*)pDevice->alsa.pPCM);
+    if (framesAvailable < 0) {
+        return 0;
+    }
+
+    return framesAvailable;
+}
+
+static mal_bool32 mal_device_write__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    if (!mal_device_is_started(pDevice) && mal_device__get_state(pDevice) != MAL_STATE_STARTING) {
+        return MAL_FALSE;
+    }
+    if (pDevice->alsa.breakFromMainLoop) {
+        return MAL_FALSE;
+    }
+
+
+    if (pDevice->alsa.isUsingMMap) {
+        // mmap.
+        mal_bool32 requiresRestart;
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__alsa(pDevice, &requiresRestart);
+        if (framesAvailable == 0) {
+            return MAL_FALSE;
+        }
+
+        // Don't bother asking the client for more audio data if we're just stopping the device anyway.
+        if (pDevice->alsa.breakFromMainLoop) {
+            return MAL_FALSE;
+        }
+
+        const snd_pcm_channel_area_t* pAreas;
+        snd_pcm_uframes_t mappedOffset;
+        snd_pcm_uframes_t mappedFrames = framesAvailable;
+        while (framesAvailable > 0) {
+            int result = ((mal_snd_pcm_mmap_begin_proc)pDevice->pContext->alsa.snd_pcm_mmap_begin)((snd_pcm_t*)pDevice->alsa.pPCM, &pAreas, &mappedOffset, &mappedFrames);
+            if (result < 0) {
+                return MAL_FALSE;
+            }
+
+            if (mappedFrames > 0) {
+                void* pBuffer = (mal_uint8*)pAreas[0].addr + ((pAreas[0].first + (mappedOffset * pAreas[0].step)) / 8);
+                mal_device__read_frames_from_client(pDevice, mappedFrames, pBuffer);
+            }
+
+            result = ((mal_snd_pcm_mmap_commit_proc)pDevice->pContext->alsa.snd_pcm_mmap_commit)((snd_pcm_t*)pDevice->alsa.pPCM, mappedOffset, mappedFrames);
+            if (result < 0 || (snd_pcm_uframes_t)result != mappedFrames) {
+                ((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, result, MAL_TRUE);
+                return MAL_FALSE;
+            }
+
+            if (requiresRestart) {
+                if (((mal_snd_pcm_start_proc)pDevice->pContext->alsa.snd_pcm_start)((snd_pcm_t*)pDevice->alsa.pPCM) < 0) {
+                    return MAL_FALSE;
+                }
+            }
+
+            framesAvailable -= mappedFrames;
+        }
+    } else {
+        // readi/writei.
+        while (!pDevice->alsa.breakFromMainLoop) {
+            mal_uint32 framesAvailable = mal_device__wait_for_frames__alsa(pDevice, NULL);
+            if (framesAvailable == 0) {
+                continue;
+            }
+
+            // Don't bother asking the client for more audio data if we're just stopping the device anyway.
+            if (pDevice->alsa.breakFromMainLoop) {
+                return MAL_FALSE;
+            }
+
+            mal_device__read_frames_from_client(pDevice, framesAvailable, pDevice->alsa.pIntermediaryBuffer);
+
+            snd_pcm_sframes_t framesWritten = ((mal_snd_pcm_writei_proc)pDevice->pContext->alsa.snd_pcm_writei)((snd_pcm_t*)pDevice->alsa.pPCM, pDevice->alsa.pIntermediaryBuffer, framesAvailable);
+            if (framesWritten < 0) {
+                if (framesWritten == -EAGAIN) {
+                    continue;   // Just keep trying...
+                } else if (framesWritten == -EPIPE) {
+                    // Underrun. Just recover and try writing again.
+                    if (((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, framesWritten, MAL_TRUE) < 0) {
+                        mal_post_error(pDevice, "[ALSA] Failed to recover device after underrun.", MAL_ALSA_FAILED_TO_RECOVER_DEVICE);
+                        return MAL_FALSE;
+                    }
+
+                    framesWritten = ((mal_snd_pcm_writei_proc)pDevice->pContext->alsa.snd_pcm_writei)((snd_pcm_t*)pDevice->alsa.pPCM, pDevice->alsa.pIntermediaryBuffer, framesAvailable);
+                    if (framesWritten < 0) {
+                        mal_post_error(pDevice, "[ALSA] Failed to write data to the internal device.", MAL_FAILED_TO_SEND_DATA_TO_DEVICE);
+                        return MAL_FALSE;
+                    }
+
+                    break;  // Success.
+                } else {
+                    mal_post_error(pDevice, "[ALSA] snd_pcm_writei() failed when writing initial data.", MAL_FAILED_TO_SEND_DATA_TO_DEVICE);
+                    return MAL_FALSE;
+                }
+            } else {
+                break;  // Success.
+            }
+        }
+    }
+
+    return MAL_TRUE;
+}
+
+static mal_bool32 mal_device_read__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+    if (!mal_device_is_started(pDevice)) {
+        return MAL_FALSE;
+    }
+    if (pDevice->alsa.breakFromMainLoop) {
+        return MAL_FALSE;
+    }
+
+    mal_uint32 framesToSend = 0;
+    void* pBuffer = NULL;
+    if (pDevice->alsa.pIntermediaryBuffer == NULL) {
+        // mmap.
+        mal_bool32 requiresRestart;
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__alsa(pDevice, &requiresRestart);
+        if (framesAvailable == 0) {
+            return MAL_FALSE;
+        }
+
+        const snd_pcm_channel_area_t* pAreas;
+        snd_pcm_uframes_t mappedOffset;
+        snd_pcm_uframes_t mappedFrames = framesAvailable;
+        while (framesAvailable > 0) {
+            int result = ((mal_snd_pcm_mmap_begin_proc)pDevice->pContext->alsa.snd_pcm_mmap_begin)((snd_pcm_t*)pDevice->alsa.pPCM, &pAreas, &mappedOffset, &mappedFrames);
+            if (result < 0) {
+                return MAL_FALSE;
+            }
+
+            if (mappedFrames > 0) {
+                void* pBuffer = (mal_uint8*)pAreas[0].addr + ((pAreas[0].first + (mappedOffset * pAreas[0].step)) / 8);
+                mal_device__send_frames_to_client(pDevice, mappedFrames, pBuffer);
+            }
+
+            result = ((mal_snd_pcm_mmap_commit_proc)pDevice->pContext->alsa.snd_pcm_mmap_commit)((snd_pcm_t*)pDevice->alsa.pPCM, mappedOffset, mappedFrames);
+            if (result < 0 || (snd_pcm_uframes_t)result != mappedFrames) {
+                ((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, result, MAL_TRUE);
+                return MAL_FALSE;
+            }
+
+            if (requiresRestart) {
+                if (((mal_snd_pcm_start_proc)pDevice->pContext->alsa.snd_pcm_start)((snd_pcm_t*)pDevice->alsa.pPCM) < 0) {
+                    return MAL_FALSE;
+                }
+            }
+
+            framesAvailable -= mappedFrames;
+        }
+    } else {
+        // readi/writei.
+        snd_pcm_sframes_t framesRead = 0;
+        while (!pDevice->alsa.breakFromMainLoop) {
+            mal_uint32 framesAvailable = mal_device__wait_for_frames__alsa(pDevice, NULL);
+            if (framesAvailable == 0) {
+                continue;
+            }
+
+            framesRead = ((mal_snd_pcm_readi_proc)pDevice->pContext->alsa.snd_pcm_readi)((snd_pcm_t*)pDevice->alsa.pPCM, pDevice->alsa.pIntermediaryBuffer, framesAvailable);
+            if (framesRead < 0) {
+                if (framesRead == -EAGAIN) {
+                    continue;   // Just keep trying...
+                } else if (framesRead == -EPIPE) {
+                    // Overrun. Just recover and try reading again.
+                    if (((mal_snd_pcm_recover_proc)pDevice->pContext->alsa.snd_pcm_recover)((snd_pcm_t*)pDevice->alsa.pPCM, framesRead, MAL_TRUE) < 0) {
+                        mal_post_error(pDevice, "[ALSA] Failed to recover device after overrun.", MAL_ALSA_FAILED_TO_RECOVER_DEVICE);
+                        return MAL_FALSE;
+                    }
+
+                    framesRead = ((mal_snd_pcm_readi_proc)pDevice->pContext->alsa.snd_pcm_readi)((snd_pcm_t*)pDevice->alsa.pPCM, pDevice->alsa.pIntermediaryBuffer, framesAvailable);
+                    if (framesRead < 0) {
+                        mal_post_error(pDevice, "[ALSA] Failed to read data from the internal device.", MAL_FAILED_TO_READ_DATA_FROM_DEVICE);
+                        return MAL_FALSE;
+                    }
+
+                    break;  // Success.
+                } else {
+                    return MAL_FALSE;
+                }
+            } else {
+                break;  // Success.
+            }
+        }
+
+        framesToSend = framesRead;
+        pBuffer = pDevice->alsa.pIntermediaryBuffer;
+    }
+
+    if (framesToSend > 0) {
+        mal_device__send_frames_to_client(pDevice, framesToSend, pBuffer);
+    }
+
+    return MAL_TRUE;
+}
+
+
+
+static mal_bool32 mal_is_device_name_in_hw_format__alsa(const char* hwid)
+{
+    // This function is just checking whether or not hwid is in "hw:%d,%d" format.
+
+    if (hwid == NULL) {
+        return MAL_FALSE;
+    }
+
+    if (hwid[0] != 'h' || hwid[1] != 'w' || hwid[2] != ':') {
+        return MAL_FALSE;
+    }
+
+    hwid += 3;
+
+    int commaPos;
+    const char* dev = mal_find_char(hwid, ',', &commaPos);
+    if (dev == NULL) {
+        return MAL_FALSE;
+    } else {
+        dev += 1;   // Skip past the ",".
+    }
+
+    // Check if the part between the ":" and the "," contains only numbers. If not, return false.
+    for (int i = 0; i < commaPos; ++i) {
+        if (hwid[i] < '0' || hwid[i] > '9') {
+            return MAL_FALSE;
+        }
+    }
+
+    // Check if everything after the "," is numeric. If not, return false.
+    int i = 0;
+    while (dev[i] != '\0') {
+        if (dev[i] < '0' || dev[i] > '9') {
+            return MAL_FALSE;
+        }
+        i += 1;
+    }
+
+    return MAL_TRUE;
+}
+
+static int mal_convert_device_name_to_hw_format__alsa(mal_context* pContext, char* dst, size_t dstSize, const char* src)  // Returns 0 on success, non-0 on error.
+{
+    // src should look something like this: "hw:CARD=I82801AAICH,DEV=0"
+
+    if (dst == NULL) return -1;
+    if (dstSize < 7) return -1;     // Absolute minimum size of the output buffer is 7 bytes.
+
+    *dst = '\0';    // Safety.
+    if (src == NULL) return -1;
+
+    // If the input name is already in "hw:%d,%d" format, just return that verbatim.
+    if (mal_is_device_name_in_hw_format__alsa(src)) {
+        return mal_strcpy_s(dst, dstSize, src);
+    }
+
+
+    int colonPos;
+    src = mal_find_char(src, ':', &colonPos);
+    if (src == NULL) {
+        return -1;  // Couldn't find a colon
+    }
+
+    char card[256];
+
+    int commaPos;
+    const char* dev = mal_find_char(src, ',', &commaPos);
+    if (dev == NULL) {
+        dev = "0";
+        mal_strncpy_s(card, sizeof(card), src+6, (size_t)-1);   // +6 = ":CARD="
+    } else {
+        dev = dev + 5;  // +5 = ",DEV="
+        mal_strncpy_s(card, sizeof(card), src+6, commaPos-6);   // +6 = ":CARD="
+    }
+
+    int cardIndex = ((mal_snd_card_get_index_proc)pContext->alsa.snd_card_get_index)(card);
+    if (cardIndex < 0) {
+        return -2;  // Failed to retrieve the card index.
+    }
+
+    //printf("TESTING: CARD=%s,DEV=%s\n", card, dev);
+
+
+    // Construction.
+    dst[0] = 'h'; dst[1] = 'w'; dst[2] = ':';
+    if (mal_itoa_s(cardIndex, dst+3, dstSize-3, 10) != 0) {
+        return -3;
+    }
+    if (mal_strcat_s(dst, dstSize, ",") != 0) {
+        return -3;
+    }
+    if (mal_strcat_s(dst, dstSize, dev) != 0) {
+        return -3;
+    }
+
+    return 0;
+}
+
+static mal_bool32 mal_does_id_exist_in_list__alsa(mal_device_id* pUniqueIDs, mal_uint32 count, const char* pHWID)
+{
+    mal_assert(pHWID != NULL);
+
+    for (mal_uint32 i = 0; i < count; ++i) {
+        if (mal_strcmp(pUniqueIDs[i].alsa, pHWID) == 0) {
+            return MAL_TRUE;
+        }
+    }
+
+    return MAL_FALSE;
+}
+
+static mal_result mal_enumerate_devices__alsa(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    char** ppDeviceHints;
+    if (((mal_snd_device_name_hint_proc)pContext->alsa.snd_device_name_hint)(-1, "pcm", (void***)&ppDeviceHints) < 0) {
+        return MAL_NO_BACKEND;
+    }
+
+    mal_device_id* pUniqueIDs = NULL;
+    mal_uint32 uniqueIDCount = 0;
+
+    char** ppNextDeviceHint = ppDeviceHints;
+    while (*ppNextDeviceHint != NULL) {
+        char* NAME = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "NAME");
+        char* DESC = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "DESC");
+        char* IOID = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "IOID");
+
+        // Only include devices if they are of the correct type. Special cases for "default", "null" and "pulse" - these are included for both playback and capture
+        // regardless of the IOID setting.
+        mal_bool32 includeThisDevice = MAL_FALSE;
+        if (strcmp(NAME, "default") == 0 || strcmp(NAME, "pulse") == 0 || strcmp(NAME, "null") == 0) {
+            includeThisDevice = MAL_TRUE;
+
+            // Exclude the "null" device if requested.
+            if (strcmp(NAME, "null") == 0 && pContext->config.alsa.excludeNullDevice) {
+                includeThisDevice = MAL_FALSE;
+            }
+        } else {
+            if ((type == mal_device_type_playback && (IOID == NULL || strcmp(IOID, "Output") == 0)) ||
+                (type == mal_device_type_capture  && (IOID != NULL && strcmp(IOID, "Input" ) == 0))) {
+                includeThisDevice = MAL_TRUE;
+            }
+        }
+
+
+
+        if (includeThisDevice) {
+#if 0
+            printf("NAME: %s\n", NAME);
+            printf("DESC: %s\n", DESC);
+            printf("IOID: %s\n", IOID);
+
+            char hwid2[256];
+            mal_convert_device_name_to_hw_format__alsa(pContext, hwid2, sizeof(hwid2), NAME);
+            printf("DEVICE ID: %s (%d)\n\n", hwid2, *pCount);
+#endif
+
+            char hwid[sizeof(pUniqueIDs->alsa)];
+            if (NAME != NULL) {
+                if (pContext->config.alsa.useVerboseDeviceEnumeration) {
+                    // Verbose mode. Use the name exactly as-is.
+                    mal_strncpy_s(hwid, sizeof(hwid), NAME, (size_t)-1);
+                } else {
+                    // Simplified mode. Use ":%d,%d" format.
+                    if (mal_convert_device_name_to_hw_format__alsa(pContext, hwid, sizeof(hwid), NAME) == 0) {
+                        // At this point, hwid looks like "hw:0,0". In simplified enumeration mode, we actually want to strip off the
+                        // plugin name so it looks like ":0,0". The reason for this is that this special format is detected at device
+                        // initialization time and is used as an indicator to try and use the most appropriate plugin depending on the
+                        // device type and sharing mode.
+                        char* dst = hwid;
+                        char* src = hwid+2;
+                        while ((*dst++ = *src++));
+                    } else {
+                        // Conversion to "hw:%d,%d" failed. Just use the name as-is.
+                        mal_strncpy_s(hwid, sizeof(hwid), NAME, (size_t)-1);
+                    }
+
+                    if (mal_does_id_exist_in_list__alsa(pUniqueIDs, uniqueIDCount, hwid)) {
+                        goto next_device;   // The device has already been enumerated. Move on to the next one.
+                    } else {
+                        // The device has not yet been enumerated. Make sure it's added to our list so that it's not enumerated again.
+                        mal_device_id* pNewUniqueIDs = mal_realloc(pUniqueIDs, sizeof(*pUniqueIDs) * (uniqueIDCount + 1));
+                        if (pNewUniqueIDs == NULL) {
+                            goto next_device;   // Failed to allocate memory.
+                        }
+
+                        pUniqueIDs = pNewUniqueIDs;
+                        mal_copy_memory(pUniqueIDs[uniqueIDCount].alsa, hwid, sizeof(hwid));
+                        uniqueIDCount += 1;
+                    }
+                }
+            } else {
+                mal_zero_memory(hwid, sizeof(hwid));
+            }
+
+            if (pInfo != NULL) {
+                if (infoSize > 0) {
+                    mal_zero_object(pInfo);
+                    mal_strncpy_s(pInfo->id.alsa, sizeof(pInfo->id.alsa), hwid, (size_t)-1);
+
+                    // DESC is the friendly name. We treat this slightly differently depending on whether or not we are using verbose
+                    // device enumeration. In verbose mode we want to take the entire description so that the end-user can distinguish
+                    // between the subdevices of each card/dev pair. In simplified mode, however, we only want the first part of the
+                    // description.
+                    //
+                    // The value in DESC seems to be split into two lines, with the first line being the name of the device and the
+                    // second line being a description of the device. I don't like having the description be across two lines because
+                    // it makes formatting ugly and annoying. I'm therefore deciding to put it all on a single line with the second line
+                    // being put into parentheses. In simplified mode I'm just stripping the second line entirely.
+                    if (DESC != NULL) {
+                        int lfPos;
+                        const char* line2 = mal_find_char(DESC, '\n', &lfPos);
+                        if (line2 != NULL) {
+                            line2 += 1; // Skip past the new-line character.
+
+                            if (pContext->config.alsa.useVerboseDeviceEnumeration) {
+                                // Verbose mode. Put the second line in brackets.
+                                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), DESC, lfPos);
+                                mal_strcat_s (pInfo->name, sizeof(pInfo->name), " (");
+                                mal_strcat_s (pInfo->name, sizeof(pInfo->name), line2);
+                                mal_strcat_s (pInfo->name, sizeof(pInfo->name), ")");
+                            } else {
+                                // Simplified mode. Strip the second line entirely.
+                                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), DESC, lfPos);
+                            }
+                        } else {
+                            // There's no second line. Just copy the whole description.
+                            mal_strcpy_s(pInfo->name, sizeof(pInfo->name), DESC);
+                        }
+                    }
+
+                    pInfo += 1;
+                    infoSize -= 1;
+                    *pCount += 1;
+                }
+            } else {
+                *pCount += 1;
+            }
+        }
+
+    next_device:
+        free(NAME);
+        free(DESC);
+        free(IOID);
+        ppNextDeviceHint += 1;
+    }
+
+    mal_free(pUniqueIDs);
+
+    ((mal_snd_device_name_free_hint_proc)pContext->alsa.snd_device_name_free_hint)((void**)ppDeviceHints);
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if ((snd_pcm_t*)pDevice->alsa.pPCM) {
+        ((mal_snd_pcm_close_proc)pDevice->pContext->alsa.snd_pcm_close)((snd_pcm_t*)pDevice->alsa.pPCM);
+
+        if (pDevice->alsa.pIntermediaryBuffer != NULL) {
+            mal_free(pDevice->alsa.pIntermediaryBuffer);
+        }
+    }
+}
+
+static mal_result mal_device_init__alsa(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->alsa);
+
+    snd_pcm_format_t formatALSA = mal_convert_mal_format_to_alsa_format(pConfig->format);
+    snd_pcm_stream_t stream = (type == mal_device_type_playback) ? SND_PCM_STREAM_PLAYBACK : SND_PCM_STREAM_CAPTURE;
+
+    if (pDeviceID == NULL) {
+        // We're opening the default device. I don't know if trying anything other than "default" is necessary, but it makes
+        // me feel better to try as hard as we can get to get _something_ working.
+        const char* defaultDeviceNames[] = {
+            "default",
+            NULL,
+            NULL,
+            NULL,
+            NULL,
+            NULL,
+            NULL
+        };
+
+        if (pConfig->preferExclusiveMode) {
+            defaultDeviceNames[1] = "hw";
+            defaultDeviceNames[2] = "hw:0";
+            defaultDeviceNames[3] = "hw:0,0";
+        } else {
+            if (type == mal_device_type_playback) {
+                defaultDeviceNames[1] = "dmix";
+                defaultDeviceNames[2] = "dmix:0";
+                defaultDeviceNames[3] = "dmix:0,0";
+            } else {
+                defaultDeviceNames[1] = "dsnoop";
+                defaultDeviceNames[2] = "dsnoop:0";
+                defaultDeviceNames[3] = "dsnoop:0,0";
+            }
+            defaultDeviceNames[4] = "hw";
+            defaultDeviceNames[5] = "hw:0";
+            defaultDeviceNames[6] = "hw:0,0";
+        }
+
+        mal_bool32 isDeviceOpen = MAL_FALSE;
+        for (size_t i = 0; i < mal_countof(defaultDeviceNames); ++i) {
+            if (defaultDeviceNames[i] != NULL && defaultDeviceNames[i][0] != '\0') {
+                if (((mal_snd_pcm_open_proc)pContext->alsa.snd_pcm_open)((snd_pcm_t**)&pDevice->alsa.pPCM, defaultDeviceNames[i], stream, 0) == 0) {
+                    isDeviceOpen = MAL_TRUE;
+                    break;
+                }
+            }
+        }
+
+        if (!isDeviceOpen) {
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] snd_pcm_open() failed when trying to open an appropriate default device.", MAL_ALSA_FAILED_TO_OPEN_DEVICE);
+        }
+    } else {
+        // We're trying to open a specific device. There's a few things to consider here:
+        //
+        // mini_al recongnizes a special format of device id that excludes the "hw", "dmix", etc. prefix. It looks like this: ":0,0", ":0,1", etc. When
+        // an ID of this format is specified, it indicates to mini_al that it can try different combinations of plugins ("hw", "dmix", etc.) until it
+        // finds an appropriate one that works. This comes in very handy when trying to open a device in shared mode ("dmix"), vs exclusive mode ("hw").
+        mal_bool32 isDeviceOpen = MAL_FALSE;
+        if (pDeviceID->alsa[0] != ':') {
+            // The ID is not in ":0,0" format. Use the ID exactly as-is.
+            if (((mal_snd_pcm_open_proc)pContext->alsa.snd_pcm_open)((snd_pcm_t**)&pDevice->alsa.pPCM, pDeviceID->alsa, stream, 0) == 0) {
+                isDeviceOpen = MAL_TRUE;
+            }
+        } else {
+            // The ID is in ":0,0" format. Try different plugins depending on the shared mode.
+            if (pDeviceID->alsa[1] == '\0') {
+                pDeviceID->alsa[0] = '\0';  // An ID of ":" should be converted to "".
+            }
+
+            char hwid[256];
+            if (!pConfig->preferExclusiveMode) {
+                if (type == mal_device_type_playback) {
+                    mal_strcpy_s(hwid, sizeof(hwid), "dmix");
+                } else {
+                    mal_strcpy_s(hwid, sizeof(hwid), "dsnoop");
+                }
+
+                if (mal_strcat_s(hwid, sizeof(hwid), pDeviceID->alsa) == 0) {
+                    if (((mal_snd_pcm_open_proc)pContext->alsa.snd_pcm_open)((snd_pcm_t**)&pDevice->alsa.pPCM, hwid, stream, 0) == 0) {
+                        isDeviceOpen = MAL_TRUE;
+                    }
+                }
+            }
+
+            // If at this point we still don't have an open device it means we're either preferencing exclusive mode or opening with "dmix"/"dsnoop" failed.
+            if (!isDeviceOpen) {
+                mal_strcpy_s(hwid, sizeof(hwid), "hw");
+                if (mal_strcat_s(hwid, sizeof(hwid), pDeviceID->alsa) == 0) {
+                    if (((mal_snd_pcm_open_proc)pContext->alsa.snd_pcm_open)((snd_pcm_t**)&pDevice->alsa.pPCM, hwid, stream, 0) == 0) {
+                        isDeviceOpen = MAL_TRUE;
+                    }
+                }
+            }
+        }
+
+        if (!isDeviceOpen) {
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] snd_pcm_open() failed.", MAL_ALSA_FAILED_TO_OPEN_DEVICE);
+        }
+    }
+
+    // We may need to scale the size of the buffer depending on the device.
+    if (pDevice->usingDefaultBufferSize) {
+        float bufferSizeScale = 1;
+
+        snd_pcm_info_t* pInfo = (snd_pcm_info_t*)alloca(((mal_snd_pcm_info_sizeof)pContext->alsa.snd_pcm_info_sizeof)());
+        mal_zero_memory(pInfo, ((mal_snd_pcm_info_sizeof)pContext->alsa.snd_pcm_info_sizeof)());
+
+        if (((mal_snd_pcm_info)pContext->alsa.snd_pcm_info)((snd_pcm_t*)pDevice->alsa.pPCM, pInfo) == 0) {
+            const char* deviceName = ((mal_snd_pcm_info_get_name)pContext->alsa.snd_pcm_info_get_name)(pInfo);
+            if (deviceName != NULL) {
+                if (strcmp(deviceName, "default") == 0) {
+                    // It's the default device. We need to use DESC from snd_device_name_hint().
+                    char** ppDeviceHints;
+                    if (((mal_snd_device_name_hint_proc)pContext->alsa.snd_device_name_hint)(-1, "pcm", (void***)&ppDeviceHints) < 0) {
+                        return MAL_NO_BACKEND;
+                    }
+
+                    char** ppNextDeviceHint = ppDeviceHints;
+                    while (*ppNextDeviceHint != NULL) {
+                        char* NAME = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "NAME");
+                        char* DESC = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "DESC");
+                        char* IOID = ((mal_snd_device_name_get_hint_proc)pContext->alsa.snd_device_name_get_hint)(*ppNextDeviceHint, "IOID");
+
+                        mal_bool32 foundDevice = MAL_FALSE;
+                        if ((type == mal_device_type_playback && (IOID == NULL || strcmp(IOID, "Output") == 0)) ||
+                            (type == mal_device_type_capture  && (IOID != NULL && strcmp(IOID, "Input" ) == 0))) {
+                            if (strcmp(NAME, deviceName) == 0) {
+                                bufferSizeScale = mal_find_default_buffer_size_scale__alsa(DESC);
+                                foundDevice = MAL_TRUE;
+                            }
+                        }
+
+                        free(NAME);
+                        free(DESC);
+                        free(IOID);
+
+                        if (foundDevice) {
+                            break;
+                        }
+                    }
+
+                    ((mal_snd_device_name_free_hint_proc)pContext->alsa.snd_device_name_free_hint)((void**)ppDeviceHints);
+                } else {
+                    bufferSizeScale = mal_find_default_buffer_size_scale__alsa(deviceName);
+                }
+            }
+
+            pDevice->bufferSizeInFrames = (mal_uint32)(pDevice->bufferSizeInFrames * bufferSizeScale);
+        }
+    }
+
+
+    // Hardware parameters.
+    snd_pcm_hw_params_t* pHWParams = (snd_pcm_hw_params_t*)alloca(((mal_snd_pcm_hw_params_sizeof_proc)pContext->alsa.snd_pcm_hw_params_sizeof)());
+    mal_zero_memory(pHWParams, ((mal_snd_pcm_hw_params_sizeof_proc)pContext->alsa.snd_pcm_hw_params_sizeof)());
+
+    if (((mal_snd_pcm_hw_params_any_proc)pContext->alsa.snd_pcm_hw_params_any)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to initialize hardware parameters. snd_pcm_hw_params_any() failed.", MAL_ALSA_FAILED_TO_SET_HW_PARAMS);
+    }
+
+
+    // MMAP Mode
+    //
+    // Try using interleaved MMAP access. If this fails, fall back to standard readi/writei.
+    pDevice->alsa.isUsingMMap = MAL_FALSE;
+    if (!pConfig->alsa.noMMap && pDevice->type != mal_device_type_capture) {    // <-- Disabling MMAP mode for capture devices because I apparently do not have a device that supports it so I can test it... Contributions welcome.
+        if (((mal_snd_pcm_hw_params_set_access_proc)pContext->alsa.snd_pcm_hw_params_set_access)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, SND_PCM_ACCESS_MMAP_INTERLEAVED) == 0) {
+            pDevice->alsa.isUsingMMap = MAL_TRUE;
+        }
+    }
+
+    if (!pDevice->alsa.isUsingMMap) {
+        if (((mal_snd_pcm_hw_params_set_access_proc)pContext->alsa.snd_pcm_hw_params_set_access)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, SND_PCM_ACCESS_RW_INTERLEAVED) < 0) {;
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] Failed to set access mode to neither SND_PCM_ACCESS_MMAP_INTERLEAVED nor SND_PCM_ACCESS_RW_INTERLEAVED. snd_pcm_hw_params_set_access() failed.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+    }
+
+
+    // Most important properties first. The documentation for OSS (yes, I know this is ALSA!) recommends format, channels, then sample rate. I can't
+    // find any documentation for ALSA specifically, so I'm going to copy the recommendation for OSS.
+
+    // Format.
+    // Try getting every supported format.
+    snd_pcm_format_mask_t* pFormatMask = (snd_pcm_format_mask_t*)alloca(((mal_snd_pcm_format_mask_sizeof_proc)pContext->alsa.snd_pcm_format_mask_sizeof)());
+    mal_zero_memory(pFormatMask, ((mal_snd_pcm_format_mask_sizeof_proc)pContext->alsa.snd_pcm_format_mask_sizeof)());
+
+    ((mal_snd_pcm_hw_params_get_format_mask_proc)pContext->alsa.snd_pcm_hw_params_get_format_mask)(pHWParams, pFormatMask);
+
+    // At this point we should have a list of supported formats, so now we need to find the best one. We first check if the requested format is
+    // supported, and if so, use that one. If it's not supported, we just run though a list of formats and try to find the best one.
+    if (!((mal_snd_pcm_format_mask_test_proc)pContext->alsa.snd_pcm_format_mask_test)(pFormatMask, formatALSA)) {
+        // The requested format is not supported so now try running through the list of formats and return the best one.
+        snd_pcm_format_t preferredFormatsALSA[] = {
+            SND_PCM_FORMAT_FLOAT_LE,    // mal_format_f32
+            SND_PCM_FORMAT_S32_LE,      // mal_format_s32
+            SND_PCM_FORMAT_S24_3LE,     // mal_format_s24
+            SND_PCM_FORMAT_S16_LE,      // mal_format_s16
+            SND_PCM_FORMAT_U8           // mal_format_u8
+        };
+
+        formatALSA = SND_PCM_FORMAT_UNKNOWN;
+        for (size_t i = 0; i < (sizeof(preferredFormatsALSA) / sizeof(preferredFormatsALSA[0])); ++i) {
+            if (((mal_snd_pcm_format_mask_test_proc)pContext->alsa.snd_pcm_format_mask_test)(pFormatMask, preferredFormatsALSA[i])) {
+                formatALSA = preferredFormatsALSA[i];
+                break;
+            }
+        }
+
+        if (formatALSA == SND_PCM_FORMAT_UNKNOWN) {
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] Format not supported. The device does not support any mini_al formats.", MAL_FORMAT_NOT_SUPPORTED);
+        }
+    }
+
+    if (((mal_snd_pcm_hw_params_set_format_proc)pContext->alsa.snd_pcm_hw_params_set_format)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, formatALSA) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Format not supported. snd_pcm_hw_params_set_format() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    pDevice->internalFormat = mal_convert_alsa_format_to_mal_format(formatALSA);
+    if (pDevice->internalFormat == mal_format_unknown) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] The chosen format is not supported by mini_al.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    // Channels.
+    mal_uint32 channels = pConfig->channels;
+    if (((mal_snd_pcm_hw_params_set_channels_near_proc)pContext->alsa.snd_pcm_hw_params_set_channels_near)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, &channels) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to set channel count. snd_pcm_hw_params_set_channels_near() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+    pDevice->internalChannels = channels;
+
+
+    // Sample Rate. It appears there's either a bug in ALSA, a bug in some drivers, or I'm doing something silly; but having resampling
+    // enabled causes problems with some device configurations when used in conjunction with MMAP access mode. To fix this problem we
+    // need to disable resampling.
+    //
+    // To reproduce this problem, open the "plug:dmix" device, and set the sample rate to 44100. Internally, it looks like dmix uses a
+    // sample rate of 48000. The hardware parameters will get set correctly with no errors, but it looks like the 44100 -> 48000 resampling
+    // doesn't work properly - but only with MMAP access mode. You will notice skipping/crackling in the audio, and it'll run at a slightly
+    // faster rate.
+    //
+    // mini_al has built-in support for sample rate conversion (albeit low quality at the moment), so disabling resampling should be fine
+    // for us. The only problem is that it won't be taking advantage of any kind of hardware-accelerated resampling and it won't be very
+    // good quality until I get a chance to improve the quality of mini_al's software sample rate conversion.
+    //
+    // I don't currently know if the dmix plugin is the only one with this error. Indeed, this is the only one I've been able to reproduce
+    // this error with. In the future, we may want to restrict the disabling of resampling to only known bad plugins.
+    ((mal_snd_pcm_hw_params_set_rate_resample_proc)pContext->alsa.snd_pcm_hw_params_set_rate_resample)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, 0);
+
+    mal_uint32 sampleRate = pConfig->sampleRate;
+    if (((mal_snd_pcm_hw_params_set_rate_near_proc)pContext->alsa.snd_pcm_hw_params_set_rate_near)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, &sampleRate, 0) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Sample rate not supported. snd_pcm_hw_params_set_rate_near() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+    pDevice->internalSampleRate = sampleRate;
+
+
+    // Periods.
+    mal_uint32 periods = pConfig->periods;
+    int dir = 0;
+    if (((mal_snd_pcm_hw_params_set_periods_near_proc)pContext->alsa.snd_pcm_hw_params_set_periods_near)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, &periods, &dir) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to set period count. snd_pcm_hw_params_set_periods_near() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+    pDevice->periods = periods;
+
+    // Buffer Size
+    snd_pcm_uframes_t actualBufferSize = pDevice->bufferSizeInFrames;
+    if (((mal_snd_pcm_hw_params_set_buffer_size_near_proc)pContext->alsa.snd_pcm_hw_params_set_buffer_size_near)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams, &actualBufferSize) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to set buffer size for device. snd_pcm_hw_params_set_buffer_size() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+    pDevice->bufferSizeInFrames = actualBufferSize;
+
+
+    // Apply hardware parameters.
+    if (((mal_snd_pcm_hw_params_proc)pContext->alsa.snd_pcm_hw_params)((snd_pcm_t*)pDevice->alsa.pPCM, pHWParams) < 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to set hardware parameters. snd_pcm_hw_params() failed.", MAL_ALSA_FAILED_TO_SET_HW_PARAMS);
+    }
+
+
+
+
+    // Software parameters.
+    snd_pcm_sw_params_t* pSWParams = (snd_pcm_sw_params_t*)alloca(((mal_snd_pcm_sw_params_sizeof_proc)pContext->alsa.snd_pcm_sw_params_sizeof)());
+    mal_zero_memory(pSWParams, ((mal_snd_pcm_sw_params_sizeof_proc)pContext->alsa.snd_pcm_sw_params_sizeof)());
+
+    if (((mal_snd_pcm_sw_params_current_proc)pContext->alsa.snd_pcm_sw_params_current)((snd_pcm_t*)pDevice->alsa.pPCM, pSWParams) != 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to initialize software parameters. snd_pcm_sw_params_current() failed.", MAL_ALSA_FAILED_TO_SET_SW_PARAMS);
+    }
+
+    if (((mal_snd_pcm_sw_params_set_avail_min_proc)pContext->alsa.snd_pcm_sw_params_set_avail_min)((snd_pcm_t*)pDevice->alsa.pPCM, pSWParams, (pDevice->sampleRate/1000) * 1) != 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] snd_pcm_sw_params_set_avail_min() failed.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    if (type == mal_device_type_playback && !pDevice->alsa.isUsingMMap) {   // Only playback devices in writei/readi mode need a start threshold.
+        if (((mal_snd_pcm_sw_params_set_start_threshold_proc)pContext->alsa.snd_pcm_sw_params_set_start_threshold)((snd_pcm_t*)pDevice->alsa.pPCM, pSWParams, (pDevice->sampleRate/1000) * 1) != 0) { //mal_prev_power_of_2(pDevice->bufferSizeInFrames/pDevice->periods)
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] Failed to set start threshold for playback device. snd_pcm_sw_params_set_start_threshold() failed.", MAL_ALSA_FAILED_TO_SET_SW_PARAMS);
+        }
+    }
+
+    if (((mal_snd_pcm_sw_params_proc)pContext->alsa.snd_pcm_sw_params)((snd_pcm_t*)pDevice->alsa.pPCM, pSWParams) != 0) {
+        mal_device_uninit__alsa(pDevice);
+        return mal_post_error(pDevice, "[ALSA] Failed to set software parameters. snd_pcm_sw_params() failed.", MAL_ALSA_FAILED_TO_SET_SW_PARAMS);
+    }
+
+
+
+    // If we're _not_ using mmap we need to use an intermediary buffer.
+    if (!pDevice->alsa.isUsingMMap) {
+        pDevice->alsa.pIntermediaryBuffer = mal_malloc(pDevice->bufferSizeInFrames * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format));
+        if (pDevice->alsa.pIntermediaryBuffer == NULL) {
+            mal_device_uninit__alsa(pDevice);
+            return mal_post_error(pDevice, "[ALSA] Failed to allocate memory for intermediary buffer.", MAL_OUT_OF_MEMORY);
+        }
+    }
+
+
+
+    // Grab the internal channel map. For now we're not going to bother trying to change the channel map and
+    // instead just do it ourselves.
+    snd_pcm_chmap_t* pChmap = ((mal_snd_pcm_get_chmap_proc)pContext->alsa.snd_pcm_get_chmap)((snd_pcm_t*)pDevice->alsa.pPCM);
+    if (pChmap != NULL) {
+        // There are cases where the returned channel map can have a different channel count than was returned by snd_pcm_hw_params_set_channels_near().
+        if (pChmap->channels >= pDevice->internalChannels) {
+            // Drop excess channels.
+            for (mal_uint32 iChannel = 0; iChannel < pDevice->internalChannels; ++iChannel) {
+                pDevice->internalChannelMap[iChannel] = mal_convert_alsa_channel_position_to_mal_channel(pChmap->pos[iChannel]);
+            }
+        } else {
+            // Excess channels use defaults. Do an initial fill with defaults, overwrite the first pChmap->channels, validate to ensure there are no duplicate
+            // channels. If validation fails, fall back to defaults.
+
+            // Fill with defaults.
+            mal_get_default_channel_mapping(pDevice->pContext->backend, pDevice->internalChannels, pDevice->internalChannelMap);
+
+            // Overwrite first pChmap->channels channels.
+            for (mal_uint32 iChannel = 0; iChannel < pChmap->channels; ++iChannel) {
+                pDevice->internalChannelMap[iChannel] = mal_convert_alsa_channel_position_to_mal_channel(pChmap->pos[iChannel]);
+            }
+
+            // Validate.
+            mal_bool32 isValid = MAL_TRUE;
+            for (mal_uint32 i = 0; i < pDevice->internalChannels && isValid; ++i) {
+                for (mal_uint32 j = i+1; j < pDevice->internalChannels; ++j) {
+                    if (pDevice->internalChannelMap[i] == pDevice->internalChannelMap[j]) {
+                        isValid = MAL_FALSE;
+                        break;
+                    }
+                }
+            }
+
+            // If our channel map is invalid, fall back to defaults.
+            if (!isValid) {
+                mal_get_default_channel_mapping(pDevice->pContext->backend, pDevice->internalChannels, pDevice->internalChannelMap);
+            }
+        }
+
+        free(pChmap);
+        pChmap = NULL;
+    } else {
+        // Could not retrieve the channel map. Fall back to a hard-coded assumption.
+        mal_get_default_channel_mapping(pDevice->pContext->backend, pDevice->internalChannels, pDevice->internalChannelMap);
+    }
+
+    return MAL_SUCCESS;
+}
+
+
+static mal_result mal_device__start_backend__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Prepare the device first...
+    if (((mal_snd_pcm_prepare_proc)pDevice->pContext->alsa.snd_pcm_prepare)((snd_pcm_t*)pDevice->alsa.pPCM) < 0) {
+        return mal_post_error(pDevice, "[ALSA] Failed to prepare device.", MAL_ALSA_FAILED_TO_PREPARE_DEVICE);
+    }
+
+    // ... and then grab an initial chunk from the client. After this is done, the device should
+    // automatically start playing, since that's how we configured the software parameters.
+    if (pDevice->type == mal_device_type_playback) {
+        if (!mal_device_write__alsa(pDevice)) {
+            return mal_post_error(pDevice, "[ALSA] Failed to write initial chunk of data to the playback device.", MAL_FAILED_TO_SEND_DATA_TO_DEVICE);
+        }
+
+        // mmap mode requires an explicit start.
+        if (pDevice->alsa.isUsingMMap) {
+            if (((mal_snd_pcm_start_proc)pDevice->pContext->alsa.snd_pcm_start)((snd_pcm_t*)pDevice->alsa.pPCM) < 0) {
+                return mal_post_error(pDevice, "[ALSA] Failed to start capture device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        }
+    } else {
+        if (((mal_snd_pcm_start_proc)pDevice->pContext->alsa.snd_pcm_start)((snd_pcm_t*)pDevice->alsa.pPCM) < 0) {
+            return mal_post_error(pDevice, "[ALSA] Failed to start capture device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    ((mal_snd_pcm_drop_proc)pDevice->pContext->alsa.snd_pcm_drop)((snd_pcm_t*)pDevice->alsa.pPCM);
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Fallback. We just set a variable to tell the worker thread to terminate after handling the
+    // next bunch of frames. This is a slow way of handling this.
+    pDevice->alsa.breakFromMainLoop = MAL_TRUE;
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__main_loop__alsa(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->alsa.breakFromMainLoop = MAL_FALSE;
+    if (pDevice->type == mal_device_type_playback) {
+        // Playback. Read from client, write to device.
+        while (!pDevice->alsa.breakFromMainLoop && mal_device_write__alsa(pDevice)) {
+        }
+    } else {
+        // Capture. Read from device, write to client.
+        while (!pDevice->alsa.breakFromMainLoop && mal_device_read__alsa(pDevice)) {
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif  // ALSA
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// OSS Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_OSS
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/soundcard.h>
+
+int mal_open_temp_device__oss()
+{
+    // The OSS sample code uses "/dev/mixer" as the device for getting system properties so I'm going to do the same.
+    int fd = open("/dev/mixer", O_RDONLY, 0);
+    if (fd >= 0) {
+        return fd;
+    }
+
+    return -1;
+}
+
+mal_result mal_context_init__oss(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    // Try opening a temporary device first so we can get version information. This is closed at the end.
+    int fd = mal_open_temp_device__oss();
+    if (fd == -1) {
+        return mal_context_post_error(pContext, NULL, "[OSS] Failed to open temporary device for retrieving system properties.", MAL_NO_BACKEND);   // Looks liks OSS isn't installed, or there are no available devices.
+    }
+
+    // Grab the OSS version.
+    int ossVersion = 0;
+    int result = ioctl(fd, OSS_GETVERSION, &ossVersion);
+    if (result == -1) {
+        close(fd);
+        return mal_context_post_error(pContext, NULL, "[OSS] Failed to retrieve OSS version.", MAL_NO_BACKEND);
+    }
+
+    pContext->oss.versionMajor = ((ossVersion & 0xFF0000) >> 16);
+    pContext->oss.versionMinor = ((ossVersion & 0x00FF00) >> 8);
+
+    close(fd);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__oss(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_oss);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_enumerate_devices__oss(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    // The object returned by SNDCTL_SYSINFO will have the information we're after.
+    int fd = mal_open_temp_device__oss();
+    if (fd == -1) {
+        return mal_context_post_error(pContext, NULL, "[OSS] Failed to open a temporary device for retrieving system information used for device enumeration.", MAL_NO_BACKEND);
+    }
+
+    oss_sysinfo si;
+    int result = ioctl(fd, SNDCTL_SYSINFO, &si);
+    if (result != -1) {
+        for (int iAudioDevice = 0; iAudioDevice < si.numaudios; ++iAudioDevice) {
+            oss_audioinfo ai;
+            ai.dev = iAudioDevice;
+            result = ioctl(fd, SNDCTL_AUDIOINFO, &ai);
+            if (result != -1) {
+                mal_bool32 includeThisDevice = MAL_FALSE;
+                if (type == mal_device_type_playback && (ai.caps & PCM_CAP_OUTPUT) != 0) {
+                    includeThisDevice = MAL_TRUE;
+                } else if (type == mal_device_type_capture && (ai.caps & PCM_CAP_INPUT) != 0) {
+                    includeThisDevice = MAL_TRUE;
+                }
+
+                if (includeThisDevice) {
+                    if (ai.devnode[0] != '\0') {    // <-- Can be blank, according to documentation.
+                        if (pInfo != NULL) {
+                            if (infoSize > 0) {
+                                mal_strncpy_s(pInfo->id.oss, sizeof(pInfo->id.oss), ai.devnode, (size_t)-1);
+
+                                // The human readable device name should be in the "ai.handle" variable, but it can
+                                // sometimes be empty in which case we just fall back to "ai.name" which is less user
+                                // friendly, but usually has a value.
+                                if (ai.handle[0] != '\0') {
+                                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), ai.handle, (size_t)-1);
+                                } else {
+                                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), ai.name, (size_t)-1);
+                                }
+
+                                pInfo += 1;
+                                infoSize -= 1;
+                                *pCount += 1;
+                            }
+                        } else {
+                            *pCount += 1;
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        // Failed to retrieve the system information. Just return a default device for both playback and capture.
+        if (pInfo != NULL) {
+            if (infoSize > 0) {
+                mal_strncpy_s(pInfo[0].id.oss, sizeof(pInfo[0].id.oss), "/dev/dsp", (size_t)-1);
+                if (type == mal_device_type_playback) {
+                    mal_strncpy_s(pInfo[0].name, sizeof(pInfo[0].name), "Default Playback Device", (size_t)-1);
+                } else {
+                    mal_strncpy_s(pInfo[0].name, sizeof(pInfo[0].name), "Default Capture Device", (size_t)-1);
+                }
+
+                *pCount = 1;
+            }
+        } else {
+            *pCount = 1;
+        }
+    }
+
+    close(fd);
+    return MAL_SUCCESS;
+}
+
+static void mal_device_uninit__oss(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    close(pDevice->oss.fd);
+    mal_free(pDevice->oss.pIntermediaryBuffer);
+}
+
+static mal_result mal_device_init__oss(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->oss);
+
+    char deviceName[64];
+    if (pDeviceID != NULL) {
+        mal_strncpy_s(deviceName, sizeof(deviceName), pDeviceID->oss, (size_t)-1);
+    } else {
+        mal_strncpy_s(deviceName, sizeof(deviceName), "/dev/dsp", (size_t)-1);
+    }
+
+    pDevice->oss.fd = open(deviceName, (type == mal_device_type_playback) ? O_WRONLY : O_RDONLY, 0);
+    if (pDevice->oss.fd == -1) {
+        return mal_post_error(pDevice, "[OSS] Failed to open device.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+    }
+
+    // The OSS documantation is very clear about the order we should be initializing the device's properties:
+    //   1) Format
+    //   2) Channels
+    //   3) Sample rate.
+
+    // Format.
+    int ossFormat = AFMT_U8;
+    switch (pDevice->format) {
+        case mal_format_s16: ossFormat = AFMT_S16_LE; break;
+        case mal_format_s24: ossFormat = AFMT_S32_LE; break;
+        case mal_format_s32: ossFormat = AFMT_S32_LE; break;
+        case mal_format_f32: ossFormat = AFMT_S32_LE; break;
+        case mal_format_u8:
+        default: ossFormat = AFMT_U8; break;
+    }
+    int result = ioctl(pDevice->oss.fd, SNDCTL_DSP_SETFMT, &ossFormat);
+    if (result == -1) {
+        close(pDevice->oss.fd);
+        return mal_post_error(pDevice, "[OSS] Failed to set format.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    switch (ossFormat) {
+        case AFMT_U8:     pDevice->internalFormat = mal_format_u8;  break;
+        case AFMT_S16_LE: pDevice->internalFormat = mal_format_s16; break;
+        case AFMT_S32_LE: pDevice->internalFormat = mal_format_s32; break;
+        default: mal_post_error(pDevice, "[OSS] The device's internal format is not supported by mini_al.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+
+    // Channels.
+    int ossChannels = (int)pConfig->channels;
+    result = ioctl(pDevice->oss.fd, SNDCTL_DSP_CHANNELS, &ossChannels);
+    if (result == -1) {
+        close(pDevice->oss.fd);
+        return mal_post_error(pDevice, "[OSS] Failed to set channel count.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    pDevice->internalChannels = ossChannels;
+
+
+    // Sample rate.
+    int ossSampleRate = (int)pConfig->sampleRate;
+    result = ioctl(pDevice->oss.fd, SNDCTL_DSP_SPEED, &ossSampleRate);
+    if (result == -1) {
+        close(pDevice->oss.fd);
+        return mal_post_error(pDevice, "[OSS] Failed to set sample rate.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    pDevice->sampleRate = ossSampleRate;
+
+
+
+    // The documentation says that the fragment settings should be set as soon as possible, but I'm not sure if
+    // it should be done before or after format/channels/rate.
+    //
+    // OSS wants the fragment size in bytes and a power of 2. When setting, we specify the power, not the actual
+    // value.
+    mal_uint32 fragmentSizeInBytes = mal_round_to_power_of_2(pDevice->bufferSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat));
+    if (fragmentSizeInBytes < 16) {
+        fragmentSizeInBytes = 16;
+    }
+
+    mal_uint32 ossFragmentSizePower = 4;
+    fragmentSizeInBytes >>= 4;
+    while (fragmentSizeInBytes >>= 1) {
+        ossFragmentSizePower += 1;
+    }
+
+    int ossFragment = (int)((pDevice->periods << 16) | ossFragmentSizePower);
+    result = ioctl(pDevice->oss.fd, SNDCTL_DSP_SETFRAGMENT, &ossFragment);
+    if (result == -1) {
+        close(pDevice->oss.fd);
+        return mal_post_error(pDevice, "[OSS] Failed to set fragment size and period count.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    int actualFragmentSizeInBytes = 1 << (ossFragment & 0xFFFF);
+    pDevice->oss.fragmentSizeInFrames = actualFragmentSizeInBytes / mal_get_sample_size_in_bytes(pDevice->internalFormat) / pDevice->internalChannels;
+
+    pDevice->periods = (mal_uint32)(ossFragment >> 16);
+    pDevice->bufferSizeInFrames = (mal_uint32)(pDevice->oss.fragmentSizeInFrames * pDevice->periods);
+
+
+    // Set the internal channel map. Not sure if this can be queried. For now just using our default assumptions.
+    mal_get_default_channel_mapping(pDevice->pContext->backend, pDevice->internalChannels, pDevice->internalChannelMap);
+
+
+    // When not using MMAP mode, we need to use an intermediary buffer for the client <-> device transfer. We do
+    // everything by the size of a fragment.
+    pDevice->oss.pIntermediaryBuffer = mal_malloc(fragmentSizeInBytes);
+    if (pDevice->oss.pIntermediaryBuffer == NULL) {
+        close(pDevice->oss.fd);
+        return mal_post_error(pDevice, "[OSS] Failed to allocate memory for intermediary buffer.", MAL_OUT_OF_MEMORY);
+    }
+
+    return MAL_SUCCESS;
+}
+
+
+static mal_result mal_device__start_backend__oss(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // The device is started by the next calls to read() and write(). For playback it's simple - just read
+    // data from the client, then write it to the device with write() which will in turn start the device.
+    // For capture it's a bit less intuitive - we do nothing (it'll be started automatically by the first
+    // call to read().
+    if (pDevice->type == mal_device_type_playback) {
+        // Playback.
+        mal_device__read_frames_from_client(pDevice, pDevice->oss.fragmentSizeInFrames, pDevice->oss.pIntermediaryBuffer);
+
+        int bytesWritten = write(pDevice->oss.fd, pDevice->oss.pIntermediaryBuffer, pDevice->oss.fragmentSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat));
+        if (bytesWritten == -1) {
+            return mal_post_error(pDevice, "[OSS] Failed to send initial chunk of data to the device.", MAL_FAILED_TO_SEND_DATA_TO_DEVICE);
+        }
+    } else {
+        // Capture. Do nothing.
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__oss(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // We want to use SNDCTL_DSP_HALT. From the documentation:
+    //
+    //   In multithreaded applications SNDCTL_DSP_HALT (SNDCTL_DSP_RESET) must only be called by the thread
+    //   that actually reads/writes the audio device. It must not be called by some master thread to kill the
+    //   audio thread. The audio thread will not stop or get any kind of notification that the device was
+    //   stopped by the master thread. The device gets stopped but the next read or write call will silently
+    //   restart the device.
+    //
+    // This is actually safe in our case, because this function is only ever called from within our worker
+    // thread anyway. Just keep this in mind, though...
+
+    int result = ioctl(pDevice->oss.fd, SNDCTL_DSP_HALT, 0);
+    if (result == -1) {
+        return mal_post_error(pDevice, "[OSS] Failed to stop device. SNDCTL_DSP_HALT failed.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__oss(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->oss.breakFromMainLoop = MAL_TRUE;
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__main_loop__oss(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->oss.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->oss.breakFromMainLoop) {
+        // Break from the main loop if the device isn't started anymore. Likely what's happened is the application
+        // has requested that the device be stopped.
+        if (!mal_device_is_started(pDevice)) {
+            break;
+        }
+
+        if (pDevice->type == mal_device_type_playback) {
+            // Playback.
+            mal_device__read_frames_from_client(pDevice, pDevice->oss.fragmentSizeInFrames, pDevice->oss.pIntermediaryBuffer);
+
+            int bytesWritten = write(pDevice->oss.fd, pDevice->oss.pIntermediaryBuffer, pDevice->oss.fragmentSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat));
+            if (bytesWritten < 0) {
+                return mal_post_error(pDevice, "[OSS] Failed to send data from the client to the device.", MAL_FAILED_TO_SEND_DATA_TO_DEVICE);
+            }
+        } else {
+            // Capture.
+            int bytesRead = read(pDevice->oss.fd, pDevice->oss.pIntermediaryBuffer, pDevice->oss.fragmentSizeInFrames * mal_get_sample_size_in_bytes(pDevice->internalFormat));
+            if (bytesRead < 0) {
+                return mal_post_error(pDevice, "[OSS] Failed to read data from the device to be sent to the client.", MAL_FAILED_TO_READ_DATA_FROM_DEVICE);
+            }
+
+            mal_uint32 framesRead = (mal_uint32)bytesRead / pDevice->internalChannels / mal_get_sample_size_in_bytes(pDevice->internalFormat);
+            mal_device__send_frames_to_client(pDevice, framesRead, pDevice->oss.pIntermediaryBuffer);
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif  // OSS
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// OpenSL|ES Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_OPENSL
+#include <SLES/OpenSLES.h>
+#ifdef MAL_ANDROID
+#include <SLES/OpenSLES_Android.h>
+#endif
+
+// Converts an individual OpenSL-style channel identifier (SL_SPEAKER_FRONT_LEFT, etc.) to mini_al.
+static mal_uint8 mal_channel_id_to_mal__opensl(SLuint32 id)
+{
+    switch (id)
+    {
+        case SL_SPEAKER_FRONT_LEFT:            return MAL_CHANNEL_FRONT_LEFT;
+        case SL_SPEAKER_FRONT_RIGHT:           return MAL_CHANNEL_FRONT_RIGHT;
+        case SL_SPEAKER_FRONT_CENTER:          return MAL_CHANNEL_FRONT_CENTER;
+        case SL_SPEAKER_LOW_FREQUENCY:         return MAL_CHANNEL_LFE;
+        case SL_SPEAKER_BACK_LEFT:             return MAL_CHANNEL_BACK_LEFT;
+        case SL_SPEAKER_BACK_RIGHT:            return MAL_CHANNEL_BACK_RIGHT;
+        case SL_SPEAKER_FRONT_LEFT_OF_CENTER:  return MAL_CHANNEL_FRONT_LEFT_CENTER;
+        case SL_SPEAKER_FRONT_RIGHT_OF_CENTER: return MAL_CHANNEL_FRONT_RIGHT_CENTER;
+        case SL_SPEAKER_BACK_CENTER:           return MAL_CHANNEL_BACK_CENTER;
+        case SL_SPEAKER_SIDE_LEFT:             return MAL_CHANNEL_SIDE_LEFT;
+        case SL_SPEAKER_SIDE_RIGHT:            return MAL_CHANNEL_SIDE_RIGHT;
+        case SL_SPEAKER_TOP_CENTER:            return MAL_CHANNEL_TOP_CENTER;
+        case SL_SPEAKER_TOP_FRONT_LEFT:        return MAL_CHANNEL_TOP_FRONT_LEFT;
+        case SL_SPEAKER_TOP_FRONT_CENTER:      return MAL_CHANNEL_TOP_FRONT_CENTER;
+        case SL_SPEAKER_TOP_FRONT_RIGHT:       return MAL_CHANNEL_TOP_FRONT_RIGHT;
+        case SL_SPEAKER_TOP_BACK_LEFT:         return MAL_CHANNEL_TOP_BACK_LEFT;
+        case SL_SPEAKER_TOP_BACK_CENTER:       return MAL_CHANNEL_TOP_BACK_CENTER;
+        case SL_SPEAKER_TOP_BACK_RIGHT:        return MAL_CHANNEL_TOP_BACK_RIGHT;
+        default: return 0;
+    }
+}
+
+// Converts an individual mini_al channel identifier (MAL_CHANNEL_FRONT_LEFT, etc.) to OpenSL-style.
+static SLuint32 mal_channel_id_to_opensl(mal_uint8 id)
+{
+    switch (id)
+    {
+        case MAL_CHANNEL_FRONT_LEFT:         return SL_SPEAKER_FRONT_LEFT;
+        case MAL_CHANNEL_FRONT_RIGHT:        return SL_SPEAKER_FRONT_RIGHT;
+        case MAL_CHANNEL_FRONT_CENTER:       return SL_SPEAKER_FRONT_CENTER;
+        case MAL_CHANNEL_LFE:                return SL_SPEAKER_LOW_FREQUENCY;
+        case MAL_CHANNEL_BACK_LEFT:          return SL_SPEAKER_BACK_LEFT;
+        case MAL_CHANNEL_BACK_RIGHT:         return SL_SPEAKER_BACK_RIGHT;
+        case MAL_CHANNEL_FRONT_LEFT_CENTER:  return SL_SPEAKER_FRONT_LEFT_OF_CENTER;
+        case MAL_CHANNEL_FRONT_RIGHT_CENTER: return SL_SPEAKER_FRONT_RIGHT_OF_CENTER;
+        case MAL_CHANNEL_BACK_CENTER:        return SL_SPEAKER_BACK_CENTER;
+        case MAL_CHANNEL_SIDE_LEFT:          return SL_SPEAKER_SIDE_LEFT;
+        case MAL_CHANNEL_SIDE_RIGHT:         return SL_SPEAKER_SIDE_RIGHT;
+        case MAL_CHANNEL_TOP_CENTER:         return SL_SPEAKER_TOP_CENTER;
+        case MAL_CHANNEL_TOP_FRONT_LEFT:     return SL_SPEAKER_TOP_FRONT_LEFT;
+        case MAL_CHANNEL_TOP_FRONT_CENTER:   return SL_SPEAKER_TOP_FRONT_CENTER;
+        case MAL_CHANNEL_TOP_FRONT_RIGHT:    return SL_SPEAKER_TOP_FRONT_RIGHT;
+        case MAL_CHANNEL_TOP_BACK_LEFT:      return SL_SPEAKER_TOP_BACK_LEFT;
+        case MAL_CHANNEL_TOP_BACK_CENTER:    return SL_SPEAKER_TOP_BACK_CENTER;
+        case MAL_CHANNEL_TOP_BACK_RIGHT:     return SL_SPEAKER_TOP_BACK_RIGHT;
+        default: return 0;
+    }
+}
+
+// Converts a channel mapping to an OpenSL-style channel mask.
+static SLuint32 mal_channel_map_to_channel_mask__opensl(const mal_uint8 channelMap[MAL_MAX_CHANNELS], mal_uint32 channels)
+{
+    SLuint32 channelMask = 0;
+    for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
+        channelMask |= mal_channel_id_to_opensl(channelMap[iChannel]);
+    }
+
+    return channelMask;
+}
+
+// Converts an OpenSL-style channel mask to a mini_al channel map.
+static void mal_channel_mask_to_channel_map__opensl(SLuint32 channelMask, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    if (channels == 2 && channelMask == 0) {
+        channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+        channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+    } else {
+        // Just iterate over each bit.
+        mal_uint32 iChannel = 0;
+        for (mal_uint32 iBit = 0; iBit < 32; ++iBit) {
+            SLuint32 bitValue = (channelMask & (1 << iBit));
+            if (bitValue != 0) {
+                // The bit is set.
+                channelMap[iChannel] = mal_channel_id_to_mal__opensl(bitValue);
+                iChannel += 1;
+            }
+        }
+    }
+}
+
+SLuint32 mal_round_to_standard_sample_rate__opensl(SLuint32 samplesPerSec)
+{
+    if (samplesPerSec <= SL_SAMPLINGRATE_8) {
+        return SL_SAMPLINGRATE_8;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_11_025) {
+        return SL_SAMPLINGRATE_11_025;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_12) {
+        return SL_SAMPLINGRATE_12;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_16) {
+        return SL_SAMPLINGRATE_16;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_22_05) {
+        return SL_SAMPLINGRATE_22_05;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_24) {
+        return SL_SAMPLINGRATE_24;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_32) {
+        return SL_SAMPLINGRATE_32;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_44_1) {
+        return SL_SAMPLINGRATE_44_1;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_48) {
+        return SL_SAMPLINGRATE_48;
+    }
+
+    // Android doesn't support more than 48000.
+#ifndef MAL_ANDROID
+    if (samplesPerSec <= SL_SAMPLINGRATE_64) {
+        return SL_SAMPLINGRATE_64;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_88_2) {
+        return SL_SAMPLINGRATE_88_2;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_96) {
+        return SL_SAMPLINGRATE_96;
+    }
+    if (samplesPerSec <= SL_SAMPLINGRATE_192) {
+        return SL_SAMPLINGRATE_192;
+    }
+#endif
+
+    return SL_SAMPLINGRATE_16;
+}
+
+mal_result mal_context_init__opensl(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__opensl(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_opensl);
+
+    (void)pContext;
+    return MAL_SUCCESS;
+}
+
+mal_result mal_enumerate_devices__opensl(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    SLObjectItf engineObj;
+    SLresult resultSL = slCreateEngine(&engineObj, 0, NULL, 0, NULL, NULL);
+    if (resultSL != SL_RESULT_SUCCESS) {
+        return MAL_NO_BACKEND;
+    }
+
+    (*engineObj)->Realize(engineObj, SL_BOOLEAN_FALSE);
+
+    // TODO: Test Me.
+    //
+    // This is currently untested, so for now we are just returning default devices.
+#if 0
+    SLuint32 pDeviceIDs[128];
+    SLint32 deviceCount = sizeof(pDeviceIDs) / sizeof(pDeviceIDs[0]);
+
+    SLAudioIODeviceCapabilitiesItf deviceCaps;
+    resultSL = (*engineObj)->GetInterface(engineObj, SL_IID_AUDIOIODEVICECAPABILITIES, &deviceCaps);
+    if (resultSL != SL_RESULT_SUCCESS) {
+        // The interface may not be supported so just report a default device.
+        (*engineObj)->Destroy(engineObj);
+        goto return_default_device;
+    }
+
+    if (type == mal_device_type_playback) {
+        resultSL = (*deviceCaps)->GetAvailableAudioOutputs(deviceCaps, &deviceCount, pDeviceIDs);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            (*engineObj)->Destroy(engineObj);
+            return MAL_NO_DEVICE;
+        }
+    } else {
+        resultSL = (*deviceCaps)->GetAvailableAudioInputs(deviceCaps, &deviceCount, pDeviceIDs);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            (*engineObj)->Destroy(engineObj);
+            return MAL_NO_DEVICE;
+        }
+    }
+
+    for (SLint32 iDevice = 0; iDevice < deviceCount; ++iDevice) {
+        if (pInfo != NULL) {
+            if (infoSize > 0) {
+                mal_zero_object(pInfo);
+                pInfo->id.opensl = pDeviceIDs[iDevice];
+
+                mal_bool32 isValidDevice = MAL_TRUE;
+                if (type == mal_device_type_playback) {
+                    SLAudioOutputDescriptor desc;
+                    resultSL = (*deviceCaps)->QueryAudioOutputCapabilities(deviceCaps, pInfo->id.opensl, &desc);
+                    if (resultSL != SL_RESULT_SUCCESS) {
+                        isValidDevice = MAL_FALSE;
+                    }
+
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), (const char*)desc.pDeviceName, (size_t)-1);
+                } else {
+                    SLAudioInputDescriptor desc;
+                    resultSL = (*deviceCaps)->QueryAudioInputCapabilities(deviceCaps, pInfo->id.opensl, &desc);
+                    if (resultSL != SL_RESULT_SUCCESS) {
+                        isValidDevice = MAL_FALSE;
+                    }
+
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), (const char*)desc.deviceName, (size_t)-1);
+                }
+
+                if (isValidDevice) {
+                    pInfo += 1;
+                    infoSize -= 1;
+                    *pCount += 1;
+                }
+            }
+        } else {
+            *pCount += 1;
+        }
+    }
+
+    (*engineObj)->Destroy(engineObj);
+    return MAL_SUCCESS;
+#else
+    (*engineObj)->Destroy(engineObj);
+    goto return_default_device;
+#endif
+
+return_default_device:
+    *pCount = 1;
+    if (pInfo != NULL) {
+        if (infoSize > 0) {
+            if (type == mal_device_type_playback) {
+                pInfo->id.opensl = SL_DEFAULTDEVICEID_AUDIOOUTPUT;
+                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Playback Device", (size_t)-1);
+            } else {
+                pInfo->id.opensl = SL_DEFAULTDEVICEID_AUDIOINPUT;
+                mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Capture Device", (size_t)-1);
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+
+// OpenSL|ES has one-per-application objects :(
+static SLObjectItf g_malEngineObjectSL = NULL;
+static SLEngineItf g_malEngineSL = NULL;
+static mal_uint32 g_malOpenSLInitCounter = 0;
+
+#define MAL_OPENSL_OBJ(p)         (*((SLObjectItf)(p)))
+#define MAL_OPENSL_OUTPUTMIX(p)   (*((SLOutputMixItf)(p)))
+#define MAL_OPENSL_PLAY(p)        (*((SLPlayItf)(p)))
+#define MAL_OPENSL_RECORD(p)      (*((SLRecordItf)(p)))
+
+#ifdef MAL_ANDROID
+#define MAL_OPENSL_BUFFERQUEUE(p) (*((SLAndroidSimpleBufferQueueItf)(p)))
+#else
+#define MAL_OPENSL_BUFFERQUEUE(p) (*((SLBufferQueueItf)(p)))
+#endif
+
+#ifdef MAL_ANDROID
+//static void mal_buffer_queue_callback__opensl_android(SLAndroidSimpleBufferQueueItf pBufferQueue, SLuint32 eventFlags, const void* pBuffer, SLuint32 bufferSize, SLuint32 dataUsed, void* pContext)
+static void mal_buffer_queue_callback__opensl_android(SLAndroidSimpleBufferQueueItf pBufferQueue, void* pUserData)
+{
+    (void)pBufferQueue;
+
+    // For now, only supporting Android implementations of OpenSL|ES since that's the only one I've
+    // been able to test with and I currently depend on Android-specific extensions (simple buffer
+    // queues).
+#ifndef MAL_ANDROID
+    return MAL_NO_BACKEND;
+#endif
+
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+
+    // For now, don't do anything unless the buffer was fully processed. From what I can tell, it looks like
+    // OpenSL|ES 1.1 improves on buffer queues to the point that we could much more intelligently handle this,
+    // but unfortunately it looks like Android is only supporting OpenSL|ES 1.0.1 for now :(
+    if (pDevice->state != MAL_STATE_STARTED) {
+        return;
+    }
+
+    size_t periodSizeInBytes = pDevice->opensl.periodSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+    mal_uint8* pBuffer = pDevice->opensl.pBuffer + (pDevice->opensl.currentBufferIndex * periodSizeInBytes);
+
+    if (pDevice->type == mal_device_type_playback) {
+        if (pDevice->state != MAL_STATE_STARTED) {
+            return;
+        }
+
+        mal_device__read_frames_from_client(pDevice, pDevice->opensl.periodSizeInFrames, pBuffer);
+
+        SLresult resultSL = MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->Enqueue((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, pBuffer, periodSizeInBytes);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return;
+        }
+    } else {
+        mal_device__send_frames_to_client(pDevice, pDevice->opensl.periodSizeInFrames, pBuffer);
+
+        SLresult resultSL = MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->Enqueue((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, pBuffer, periodSizeInBytes);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return;
+        }
+    }
+
+    pDevice->opensl.currentBufferIndex = (pDevice->opensl.currentBufferIndex + 1) % pDevice->periods;
+}
+#endif
+
+static void mal_device_uninit__opensl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    // Uninit device.
+    if (pDevice->type == mal_device_type_playback) {
+        if (pDevice->opensl.pAudioPlayerObj) MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioPlayerObj);
+        if (pDevice->opensl.pOutputMixObj) MAL_OPENSL_OBJ(pDevice->opensl.pOutputMixObj)->Destroy((SLObjectItf)pDevice->opensl.pOutputMixObj);
+    } else {
+        if (pDevice->opensl.pAudioRecorderObj) MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioRecorderObj);
+    }
+
+    mal_free(pDevice->opensl.pBuffer);
+
+
+    // Uninit global data.
+    if (g_malOpenSLInitCounter > 0) {
+        if (mal_atomic_decrement_32(&g_malOpenSLInitCounter) == 0) {
+            (*g_malEngineObjectSL)->Destroy(g_malEngineObjectSL);
+        }
+    }
+}
+
+static mal_result mal_device_init__opensl(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    (void)pContext;
+
+    // For now, only supporting Android implementations of OpenSL|ES since that's the only one I've
+    // been able to test with and I currently depend on Android-specific extensions (simple buffer
+    // queues).
+#ifndef MAL_ANDROID
+    return MAL_NO_BACKEND;
+#endif
+
+    // Use s32 as the internal format for when floating point is specified.
+    if (pConfig->format == mal_format_f32) {
+        pDevice->internalFormat = mal_format_s32;
+    }
+
+    // Initialize global data first if applicable.
+    if (mal_atomic_increment_32(&g_malOpenSLInitCounter) == 1) {
+        SLresult resultSL = slCreateEngine(&g_malEngineObjectSL, 0, NULL, 0, NULL, NULL);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            mal_atomic_decrement_32(&g_malOpenSLInitCounter);
+            return mal_post_error(pDevice, "[OpenSL] slCreateEngine() failed.", MAL_NO_BACKEND);
+        }
+
+        (*g_malEngineObjectSL)->Realize(g_malEngineObjectSL, SL_BOOLEAN_FALSE);
+
+        resultSL = (*g_malEngineObjectSL)->GetInterface(g_malEngineObjectSL, SL_IID_ENGINE, &g_malEngineSL);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            (*g_malEngineObjectSL)->Destroy(g_malEngineObjectSL);
+            mal_atomic_decrement_32(&g_malOpenSLInitCounter);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_ENGINE interface.", MAL_NO_BACKEND);
+        }
+    }
+
+
+    // Now we can start initializing the device properly.
+    mal_assert(pDevice != NULL);
+    mal_zero_object(&pDevice->opensl);
+
+    pDevice->opensl.currentBufferIndex = 0;
+    pDevice->opensl.periodSizeInFrames = pDevice->bufferSizeInFrames / pConfig->periods;
+    pDevice->bufferSizeInFrames = pDevice->opensl.periodSizeInFrames * pConfig->periods;
+
+    SLDataLocator_AndroidSimpleBufferQueue queue;
+    queue.locatorType = SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE;
+    queue.numBuffers = pConfig->periods;
+
+    SLDataFormat_PCM* pFormat = NULL;
+
+#if defined(MAL_ANDROID) && __ANDROID_API__ >= 21
+    SLAndroidDataFormat_PCM_EX pcmEx;
+    if (pDevice->format == mal_format_f32 /*|| pDevice->format == mal_format_f64*/) {
+        pcmEx.formatType = SL_ANDROID_DATAFORMAT_PCM_EX;
+        pcmEx.representation = SL_ANDROID_PCM_REPRESENTATION_FLOAT;
+    } else {
+        pcmEx.formatType = SL_DATAFORMAT_PCM;
+    }
+    pFormat = (SLDataFormat_PCM*)&pcmEx;
+#else
+    SLDataFormat_PCM pcm;
+    pcm.formatType = SL_DATAFORMAT_PCM;
+    pFormat = &pcm;
+#endif
+
+    pFormat->numChannels   = pDevice->channels;
+    pFormat->samplesPerSec = mal_round_to_standard_sample_rate__opensl(pDevice->sampleRate * 1000);  // In millihertz.
+    pFormat->bitsPerSample = mal_get_sample_size_in_bytes(pDevice->format)*8;
+    pFormat->containerSize = pFormat->bitsPerSample;  // Always tightly packed for now.
+    pFormat->channelMask   = mal_channel_map_to_channel_mask__opensl(pConfig->channelMap, pFormat->numChannels);
+    pFormat->endianness    = SL_BYTEORDER_LITTLEENDIAN;
+
+    // Android has a few restrictions on the format as documented here: https://developer.android.com/ndk/guides/audio/opensl-for-android.html
+    //  - Only mono and stereo is supported.
+    //  - Only u8 and s16 formats are supported.
+    //  - Limited to a sample rate of 48000.
+#ifdef MAL_ANDROID
+    if (pFormat->numChannels > 2) {
+        pFormat->numChannels = 2;
+    }
+#if __ANDROID_API__ >= 21
+    if (pFormat->formatType == SL_ANDROID_DATAFORMAT_PCM_EX) {
+        // It's floating point.
+        mal_assert(pcmEx.representation == SL_ANDROID_PCM_REPRESENTATION_FLOAT);
+        if (pFormat->bitsPerSample > 32) {
+            pFormat->bitsPerSample = 32;
+        }
+    } else {
+        if (pFormat->bitsPerSample > 16) {
+            pFormat->bitsPerSample = 16;
+        }
+    }
+#else
+    if (pFormat->bitsPerSample > 16) {
+        pFormat->bitsPerSample = 16;
+    }
+#endif
+    pFormat->containerSize = pFormat->bitsPerSample;  // Always tightly packed for now.
+
+    if (pFormat->samplesPerSec > SL_SAMPLINGRATE_48) {
+        pFormat->samplesPerSec = SL_SAMPLINGRATE_48;
+    }
+#endif
+
+    if (type == mal_device_type_playback) {
+        SLresult resultSL = (*g_malEngineSL)->CreateOutputMix(g_malEngineSL, (SLObjectItf*)&pDevice->opensl.pOutputMixObj, 0, NULL, NULL);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to create output mix.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pOutputMixObj)->Realize((SLObjectItf)pDevice->opensl.pOutputMixObj, SL_BOOLEAN_FALSE)) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to realize output mix object.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pOutputMixObj)->GetInterface((SLObjectItf)pDevice->opensl.pOutputMixObj, SL_IID_OUTPUTMIX, &pDevice->opensl.pOutputMix) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_OUTPUTMIX interface.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        // Set the output device.
+        if (pDeviceID != NULL) {
+            MAL_OPENSL_OUTPUTMIX(pDevice->opensl.pOutputMix)->ReRoute((SLOutputMixItf)pDevice->opensl.pOutputMix, 1, &pDeviceID->opensl);
+        }
+
+        SLDataSource source;
+        source.pLocator = &queue;
+        source.pFormat = pFormat;
+
+        SLDataLocator_OutputMix outmixLocator;
+        outmixLocator.locatorType = SL_DATALOCATOR_OUTPUTMIX;
+        outmixLocator.outputMix = (SLObjectItf)pDevice->opensl.pOutputMixObj;
+
+        SLDataSink sink;
+        sink.pLocator = &outmixLocator;
+        sink.pFormat = NULL;
+
+        const SLInterfaceID itfIDs1[] = {SL_IID_ANDROIDSIMPLEBUFFERQUEUE};
+        const SLboolean itfIDsRequired1[] = {SL_BOOLEAN_TRUE};
+        resultSL = (*g_malEngineSL)->CreateAudioPlayer(g_malEngineSL, (SLObjectItf*)&pDevice->opensl.pAudioPlayerObj, &source, &sink, 1, itfIDs1, itfIDsRequired1);
+        if (resultSL == SL_RESULT_CONTENT_UNSUPPORTED) {
+            // Unsupported format. Fall back to something safer and try again. If this fails, just abort.
+            pFormat->formatType = SL_DATAFORMAT_PCM;
+            pFormat->numChannels = 2;
+            pFormat->samplesPerSec = SL_SAMPLINGRATE_16;
+            pFormat->bitsPerSample = 16;
+            pFormat->containerSize = pFormat->bitsPerSample;  // Always tightly packed for now.
+            pFormat->channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
+            resultSL = (*g_malEngineSL)->CreateAudioPlayer(g_malEngineSL, (SLObjectItf*)&pDevice->opensl.pAudioPlayerObj, &source, &sink, 1, itfIDs1, itfIDsRequired1);
+        }
+
+        if (resultSL != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to create audio player.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->Realize((SLObjectItf)pDevice->opensl.pAudioPlayerObj, SL_BOOLEAN_FALSE) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to realize audio player.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->GetInterface((SLObjectItf)pDevice->opensl.pAudioPlayerObj, SL_IID_PLAY, &pDevice->opensl.pAudioPlayer) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_PLAY interface.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->GetInterface((SLObjectItf)pDevice->opensl.pAudioPlayerObj, SL_IID_ANDROIDSIMPLEBUFFERQUEUE, &pDevice->opensl.pBufferQueue) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_ANDROIDSIMPLEBUFFERQUEUE interface.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->RegisterCallback((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, mal_buffer_queue_callback__opensl_android, pDevice) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to register buffer queue callback.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+    } else {
+        SLDataLocator_IODevice locatorDevice;
+        locatorDevice.locatorType = SL_DATALOCATOR_IODEVICE;
+        locatorDevice.deviceType = SL_IODEVICE_AUDIOINPUT;
+        locatorDevice.deviceID = (pDeviceID == NULL) ? SL_DEFAULTDEVICEID_AUDIOINPUT : pDeviceID->opensl;
+        locatorDevice.device = NULL;
+
+        SLDataSource source;
+        source.pLocator = &locatorDevice;
+        source.pFormat = NULL;
+
+        SLDataSink sink;
+        sink.pLocator = &queue;
+        sink.pFormat = pFormat;
+
+        const SLInterfaceID itfIDs1[] = {SL_IID_ANDROIDSIMPLEBUFFERQUEUE};
+        const SLboolean itfIDsRequired1[] = {SL_BOOLEAN_TRUE};
+        SLresult resultSL = (*g_malEngineSL)->CreateAudioRecorder(g_malEngineSL, (SLObjectItf*)&pDevice->opensl.pAudioRecorderObj, &source, &sink, 1, itfIDs1, itfIDsRequired1);
+        if (resultSL == SL_RESULT_CONTENT_UNSUPPORTED) {
+            // Unsupported format. Fall back to something safer and try again. If this fails, just abort.
+            pFormat->formatType = SL_DATAFORMAT_PCM;
+            pFormat->numChannels = 1;
+            pFormat->samplesPerSec = SL_SAMPLINGRATE_16;
+            pFormat->bitsPerSample = 16;
+            pFormat->containerSize = pFormat->bitsPerSample;  // Always tightly packed for now.
+            pFormat->channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
+            resultSL = (*g_malEngineSL)->CreateAudioRecorder(g_malEngineSL, (SLObjectItf*)&pDevice->opensl.pAudioRecorderObj, &source, &sink, 1, itfIDs1, itfIDsRequired1);
+        }
+
+        if (resultSL != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to create audio recorder.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->Realize((SLObjectItf)pDevice->opensl.pAudioRecorderObj, SL_BOOLEAN_FALSE) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to realize audio recorder.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->GetInterface((SLObjectItf)pDevice->opensl.pAudioRecorderObj, SL_IID_RECORD, &pDevice->opensl.pAudioRecorder) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_RECORD interface.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->GetInterface((SLObjectItf)pDevice->opensl.pAudioRecorderObj, SL_IID_ANDROIDSIMPLEBUFFERQUEUE, &pDevice->opensl.pBufferQueue) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to retrieve SL_IID_ANDROIDSIMPLEBUFFERQUEUE interface.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+
+        if (MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->RegisterCallback((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, mal_buffer_queue_callback__opensl_android, pDevice) != SL_RESULT_SUCCESS) {
+            mal_device_uninit__opensl(pDevice);
+            return mal_post_error(pDevice, "[OpenSL] Failed to register buffer queue callback.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+    }
+
+
+    // The internal format is determined by the pFormat object.
+    mal_bool32 isFloatingPoint = MAL_FALSE;
+#if defined(MAL_ANDROID) && __ANDROID_API__ >= 21
+    if (pFormat->formatType == SL_ANDROID_DATAFORMAT_PCM_EX) {
+        mal_assert(pcmEx.representation == SL_ANDROID_PCM_REPRESENTATION_FLOAT);
+        isFloatingPoint = MAL_TRUE;
+    }
+#endif
+    if (isFloatingPoint) {
+        if (pFormat->bitsPerSample == 32) {
+            pDevice->internalFormat = mal_format_f32;
+        }
+#if 0
+        if (pFormat->bitsPerSample == 64) {
+            pDevice->internalFormat = mal_format_f64;
+        }
+#endif
+    } else {
+        if (pFormat->bitsPerSample == 8) {
+            pDevice->internalFormat = mal_format_u8;
+        } else if (pFormat->bitsPerSample == 16) {
+            pDevice->internalFormat = mal_format_s16;
+        } else if (pFormat->bitsPerSample == 24) {
+            pDevice->internalFormat = mal_format_s24;
+        } else if (pFormat->bitsPerSample == 32) {
+            pDevice->internalFormat = mal_format_s32;
+        }
+    }
+
+    pDevice->internalChannels = pFormat->numChannels;
+    pDevice->internalSampleRate = pFormat->samplesPerSec / 1000;
+    mal_channel_mask_to_channel_map__opensl(pFormat->channelMask, pDevice->internalChannels, pDevice->internalChannelMap);
+
+
+    size_t bufferSizeInBytes = pDevice->bufferSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+    pDevice->opensl.pBuffer = (mal_uint8*)mal_malloc(bufferSizeInBytes);
+    if (pDevice->opensl.pBuffer == NULL) {
+        mal_device_uninit__opensl(pDevice);
+        return mal_post_error(pDevice, "[OpenSL] Failed to allocate memory for data buffer.", MAL_OUT_OF_MEMORY);
+    }
+
+    mal_zero_memory(pDevice->opensl.pBuffer, bufferSizeInBytes);
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__start_backend__opensl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        SLresult resultSL = MAL_OPENSL_PLAY(pDevice->opensl.pAudioPlayer)->SetPlayState((SLPlayItf)pDevice->opensl.pAudioPlayer, SL_PLAYSTATE_PLAYING);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return mal_post_error(pDevice, "[OpenSL] Failed to start internal playback device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+        }
+
+        // We need to enqueue a buffer for each period.
+        mal_device__read_frames_from_client(pDevice, pDevice->bufferSizeInFrames, pDevice->opensl.pBuffer);
+
+        size_t periodSizeInBytes = pDevice->opensl.periodSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+        for (mal_uint32 iPeriod = 0; iPeriod < pDevice->periods; ++iPeriod) {
+            resultSL = MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->Enqueue((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, pDevice->opensl.pBuffer + (periodSizeInBytes * iPeriod), periodSizeInBytes);
+            if (resultSL != SL_RESULT_SUCCESS) {
+                MAL_OPENSL_PLAY(pDevice->opensl.pAudioPlayer)->SetPlayState((SLPlayItf)pDevice->opensl.pAudioPlayer, SL_PLAYSTATE_STOPPED);
+                return mal_post_error(pDevice, "[OpenSL] Failed to enqueue buffer for playback device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        }
+    } else {
+        SLresult resultSL = MAL_OPENSL_RECORD(pDevice->opensl.pAudioRecorder)->SetRecordState((SLRecordItf)pDevice->opensl.pAudioRecorder, SL_RECORDSTATE_RECORDING);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return mal_post_error(pDevice, "[OpenSL] Failed to start internal capture device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+        }
+
+        size_t periodSizeInBytes = pDevice->opensl.periodSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat);
+        for (mal_uint32 iPeriod = 0; iPeriod < pDevice->periods; ++iPeriod) {
+            resultSL = MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->Enqueue((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue, pDevice->opensl.pBuffer + (periodSizeInBytes * iPeriod), periodSizeInBytes);
+            if (resultSL != SL_RESULT_SUCCESS) {
+                MAL_OPENSL_RECORD(pDevice->opensl.pAudioRecorder)->SetRecordState((SLRecordItf)pDevice->opensl.pAudioRecorder, SL_RECORDSTATE_STOPPED);
+                return mal_post_error(pDevice, "[OpenSL] Failed to enqueue buffer for capture device.", MAL_FAILED_TO_START_BACKEND_DEVICE);
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__opensl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        SLresult resultSL = MAL_OPENSL_PLAY(pDevice->opensl.pAudioPlayer)->SetPlayState((SLPlayItf)pDevice->opensl.pAudioPlayer, SL_PLAYSTATE_STOPPED);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return mal_post_error(pDevice, "[OpenSL] Failed to stop internal playback device.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+        }
+    } else {
+        SLresult resultSL = MAL_OPENSL_RECORD(pDevice->opensl.pAudioRecorder)->SetRecordState((SLRecordItf)pDevice->opensl.pAudioRecorder, SL_RECORDSTATE_STOPPED);
+        if (resultSL != SL_RESULT_SUCCESS) {
+            return mal_post_error(pDevice, "[OpenSL] Failed to stop internal capture device.", MAL_FAILED_TO_STOP_BACKEND_DEVICE);
+        }
+    }
+
+    // Make sure any queued buffers are cleared.
+    MAL_OPENSL_BUFFERQUEUE(pDevice->opensl.pBufferQueue)->Clear((SLAndroidSimpleBufferQueueItf)pDevice->opensl.pBufferQueue);
+
+    // Make sure the client is aware that the device has stopped. There may be an OpenSL|ES callback for this, but I haven't found it.
+    mal_device__set_state(pDevice, MAL_STATE_STOPPED);
+    if (pDevice->onStop) {
+        pDevice->onStop(pDevice);
+    }
+
+    return MAL_SUCCESS;
+}
+#endif  // OpenSL|ES
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// OpenAL Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_OPENAL
+#ifdef MAL_WIN32
+#define MAL_AL_APIENTRY __cdecl
+#else
+#define MAL_AL_APIENTRY
+#endif
+
+#ifdef MAL_NO_RUNTIME_LINKING
+    #if defined(MAL_APPLE)
+        #include <OpenAL/al.h>
+        #include <OpenAL/alc.h>
+    #else
+        #include <AL/al.h>
+        #include <AL/alc.h>
+    #endif
+#endif
+
+typedef struct mal_ALCdevice_struct  mal_ALCdevice;
+typedef struct mal_ALCcontext_struct mal_ALCcontext;
+typedef char                         mal_ALCboolean;
+typedef char                         mal_ALCchar;
+typedef signed char                  mal_ALCbyte;
+typedef unsigned char                mal_ALCubyte;
+typedef short                        mal_ALCshort;
+typedef unsigned short               mal_ALCushort;
+typedef int                          mal_ALCint;
+typedef unsigned int                 mal_ALCuint;
+typedef int                          mal_ALCsizei;
+typedef int                          mal_ALCenum;
+typedef float                        mal_ALCfloat;
+typedef double                       mal_ALCdouble;
+typedef void                         mal_ALCvoid;
+
+typedef mal_ALCboolean               mal_ALboolean;
+typedef mal_ALCchar                  mal_ALchar;
+typedef mal_ALCbyte                  mal_ALbyte;
+typedef mal_ALCubyte                 mal_ALubyte;
+typedef mal_ALCshort                 mal_ALshort;
+typedef mal_ALCushort                mal_ALushort;
+typedef mal_ALCint                   mal_ALint;
+typedef mal_ALCuint                  mal_ALuint;
+typedef mal_ALCsizei                 mal_ALsizei;
+typedef mal_ALCenum                  mal_ALenum;
+typedef mal_ALCfloat                 mal_ALfloat;
+typedef mal_ALCdouble                mal_ALdouble;
+typedef mal_ALCvoid                  mal_ALvoid;
+
+#define MAL_ALC_DEVICE_SPECIFIER            0x1005
+#define MAL_ALC_CAPTURE_DEVICE_SPECIFIER    0x310
+#define MAL_ALC_CAPTURE_SAMPLES             0x312
+
+#define MAL_AL_SOURCE_STATE                 0x1010
+#define MAL_AL_INITIAL                      0x1011
+#define MAL_AL_PLAYING                      0x1012
+#define MAL_AL_PAUSED                       0x1013
+#define MAL_AL_STOPPED                      0x1014
+#define MAL_AL_BUFFERS_PROCESSED            0x1016
+
+#define MAL_AL_FORMAT_MONO8                 0x1100
+#define MAL_AL_FORMAT_MONO16                0x1101
+#define MAL_AL_FORMAT_STEREO8               0x1102
+#define MAL_AL_FORMAT_STEREO16              0x1103
+#define MAL_AL_FORMAT_MONO_FLOAT32          0x10010
+#define MAL_AL_FORMAT_STEREO_FLOAT32        0x10011
+#define MAL_AL_FORMAT_51CHN16               0x120B
+#define MAL_AL_FORMAT_51CHN32               0x120C
+#define MAL_AL_FORMAT_51CHN8                0x120A
+#define MAL_AL_FORMAT_61CHN16               0x120E
+#define MAL_AL_FORMAT_61CHN32               0x120F
+#define MAL_AL_FORMAT_61CHN8                0x120D
+#define MAL_AL_FORMAT_71CHN16               0x1211
+#define MAL_AL_FORMAT_71CHN32               0x1212
+#define MAL_AL_FORMAT_71CHN8                0x1210
+#define MAL_AL_FORMAT_QUAD16                0x1205
+#define MAL_AL_FORMAT_QUAD32                0x1206
+#define MAL_AL_FORMAT_QUAD8                 0x1204
+#define MAL_AL_FORMAT_REAR16                0x1208
+#define MAL_AL_FORMAT_REAR32                0x1209
+#define MAL_AL_FORMAT_REAR8                 0x1207
+
+typedef mal_ALCcontext*    (MAL_AL_APIENTRY * MAL_LPALCCREATECONTEXT)      (mal_ALCdevice *device, const mal_ALCint *attrlist);
+typedef mal_ALCboolean     (MAL_AL_APIENTRY * MAL_LPALCMAKECONTEXTCURRENT) (mal_ALCcontext *context);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCPROCESSCONTEXT)     (mal_ALCcontext *context);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCSUSPENDCONTEXT)     (mal_ALCcontext *context);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCDESTROYCONTEXT)     (mal_ALCcontext *context);
+typedef mal_ALCcontext*    (MAL_AL_APIENTRY * MAL_LPALCGETCURRENTCONTEXT)  (void);
+typedef mal_ALCdevice*     (MAL_AL_APIENTRY * MAL_LPALCGETCONTEXTSDEVICE)  (mal_ALCcontext *context);
+typedef mal_ALCdevice*     (MAL_AL_APIENTRY * MAL_LPALCOPENDEVICE)         (const mal_ALCchar *devicename);
+typedef mal_ALCboolean     (MAL_AL_APIENTRY * MAL_LPALCCLOSEDEVICE)        (mal_ALCdevice *device);
+typedef mal_ALCenum        (MAL_AL_APIENTRY * MAL_LPALCGETERROR)           (mal_ALCdevice *device);
+typedef mal_ALCboolean     (MAL_AL_APIENTRY * MAL_LPALCISEXTENSIONPRESENT) (mal_ALCdevice *device, const mal_ALCchar *extname);
+typedef void*              (MAL_AL_APIENTRY * MAL_LPALCGETPROCADDRESS)     (mal_ALCdevice *device, const mal_ALCchar *funcname);
+typedef mal_ALCenum        (MAL_AL_APIENTRY * MAL_LPALCGETENUMVALUE)       (mal_ALCdevice *device, const mal_ALCchar *enumname);
+typedef const mal_ALCchar* (MAL_AL_APIENTRY * MAL_LPALCGETSTRING)          (mal_ALCdevice *device, mal_ALCenum param);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCGETINTEGERV)        (mal_ALCdevice *device, mal_ALCenum param, mal_ALCsizei size, mal_ALCint *values);
+typedef mal_ALCdevice*     (MAL_AL_APIENTRY * MAL_LPALCCAPTUREOPENDEVICE)  (const mal_ALCchar *devicename, mal_ALCuint frequency, mal_ALCenum format, mal_ALCsizei buffersize);
+typedef mal_ALCboolean     (MAL_AL_APIENTRY * MAL_LPALCCAPTURECLOSEDEVICE) (mal_ALCdevice *device);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCCAPTURESTART)       (mal_ALCdevice *device);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCCAPTURESTOP)        (mal_ALCdevice *device);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALCCAPTURESAMPLES)     (mal_ALCdevice *device, mal_ALCvoid *buffer, mal_ALCsizei samples);
+
+typedef void               (MAL_AL_APIENTRY * MAL_LPALENABLE)              (mal_ALenum capability);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALDISABLE)             (mal_ALenum capability);
+typedef mal_ALboolean      (MAL_AL_APIENTRY * MAL_LPALISENABLED)           (mal_ALenum capability);
+typedef const mal_ALchar*  (MAL_AL_APIENTRY * MAL_LPALGETSTRING)           (mal_ALenum param);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBOOLEANV)         (mal_ALenum param, mal_ALboolean *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETINTEGERV)         (mal_ALenum param, mal_ALint *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETFLOATV)           (mal_ALenum param, mal_ALfloat *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETDOUBLEV)          (mal_ALenum param, mal_ALdouble *values);
+typedef mal_ALboolean      (MAL_AL_APIENTRY * MAL_LPALGETBOOLEAN)          (mal_ALenum param);
+typedef mal_ALint          (MAL_AL_APIENTRY * MAL_LPALGETINTEGER)          (mal_ALenum param);
+typedef mal_ALfloat        (MAL_AL_APIENTRY * MAL_LPALGETFLOAT)            (mal_ALenum param);
+typedef mal_ALdouble       (MAL_AL_APIENTRY * MAL_LPALGETDOUBLE)           (mal_ALenum param);
+typedef mal_ALenum         (MAL_AL_APIENTRY * MAL_LPALGETERROR)            (void);
+typedef mal_ALboolean      (MAL_AL_APIENTRY * MAL_LPALISEXTENSIONPRESENT)  (const mal_ALchar *extname);
+typedef void*              (MAL_AL_APIENTRY * MAL_LPALGETPROCADDRESS)      (const mal_ALchar *fname);
+typedef mal_ALenum         (MAL_AL_APIENTRY * MAL_LPALGETENUMVALUE)        (const mal_ALchar *ename);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGENSOURCES)          (mal_ALsizei n, mal_ALuint *sources);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALDELETESOURCES)       (mal_ALsizei n, const mal_ALuint *sources);
+typedef mal_ALboolean      (MAL_AL_APIENTRY * MAL_LPALISSOURCE)            (mal_ALuint source);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEF)             (mal_ALuint source, mal_ALenum param, mal_ALfloat value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCE3F)            (mal_ALuint source, mal_ALenum param, mal_ALfloat value1, mal_ALfloat value2, mal_ALfloat value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEFV)            (mal_ALuint source, mal_ALenum param, const mal_ALfloat *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEI)             (mal_ALuint source, mal_ALenum param, mal_ALint value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCE3I)            (mal_ALuint source, mal_ALenum param, mal_ALint value1, mal_ALint value2, mal_ALint value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEIV)            (mal_ALuint source, mal_ALenum param, const mal_ALint *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCEF)          (mal_ALuint source, mal_ALenum param, mal_ALfloat *value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCE3F)         (mal_ALuint source, mal_ALenum param, mal_ALfloat *value1, mal_ALfloat *value2, mal_ALfloat *value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCEFV)         (mal_ALuint source, mal_ALenum param, mal_ALfloat *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCEI)          (mal_ALuint source, mal_ALenum param, mal_ALint *value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCE3I)         (mal_ALuint source, mal_ALenum param, mal_ALint *value1, mal_ALint *value2, mal_ALint *value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETSOURCEIV)         (mal_ALuint source, mal_ALenum param, mal_ALint *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEPLAYV)         (mal_ALsizei n, const mal_ALuint *sources);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCESTOPV)         (mal_ALsizei n, const mal_ALuint *sources);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEREWINDV)       (mal_ALsizei n, const mal_ALuint *sources);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEPAUSEV)        (mal_ALsizei n, const mal_ALuint *sources);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEPLAY)          (mal_ALuint source);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCESTOP)          (mal_ALuint source);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEREWIND)        (mal_ALuint source);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEPAUSE)         (mal_ALuint source);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEQUEUEBUFFERS)  (mal_ALuint source, mal_ALsizei nb, const mal_ALuint *buffers);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALSOURCEUNQUEUEBUFFERS)(mal_ALuint source, mal_ALsizei nb, mal_ALuint *buffers);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGENBUFFERS)          (mal_ALsizei n, mal_ALuint *buffers);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALDELETEBUFFERS)       (mal_ALsizei n, const mal_ALuint *buffers);
+typedef mal_ALboolean      (MAL_AL_APIENTRY * MAL_LPALISBUFFER)            (mal_ALuint buffer);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFERDATA)          (mal_ALuint buffer, mal_ALenum format, const mal_ALvoid *data, mal_ALsizei size, mal_ALsizei freq);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFERF)             (mal_ALuint buffer, mal_ALenum param, mal_ALfloat value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFER3F)            (mal_ALuint buffer, mal_ALenum param, mal_ALfloat value1, mal_ALfloat value2, mal_ALfloat value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFERFV)            (mal_ALuint buffer, mal_ALenum param, const mal_ALfloat *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFERI)             (mal_ALuint buffer, mal_ALenum param, mal_ALint value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFER3I)            (mal_ALuint buffer, mal_ALenum param, mal_ALint value1, mal_ALint value2, mal_ALint value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALBUFFERIV)            (mal_ALuint buffer, mal_ALenum param, const mal_ALint *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFERF)          (mal_ALuint buffer, mal_ALenum param, mal_ALfloat *value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFER3F)         (mal_ALuint buffer, mal_ALenum param, mal_ALfloat *value1, mal_ALfloat *value2, mal_ALfloat *value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFERFV)         (mal_ALuint buffer, mal_ALenum param, mal_ALfloat *values);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFERI)          (mal_ALuint buffer, mal_ALenum param, mal_ALint *value);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFER3I)         (mal_ALuint buffer, mal_ALenum param, mal_ALint *value1, mal_ALint *value2, mal_ALint *value3);
+typedef void               (MAL_AL_APIENTRY * MAL_LPALGETBUFFERIV)         (mal_ALuint buffer, mal_ALenum param, mal_ALint *values);
+
+mal_result mal_context_init__openal(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+#ifndef MAL_NO_RUNTIME_LINKING
+    const char* libName = NULL;
+#ifdef MAL_WIN32
+    libName = "OpenAL32.dll";
+#endif
+#if defined(MAL_UNIX) && !defined(MAL_APPLE)
+    libName = "libopenal.so";
+#endif
+#ifdef MAL_APPLE
+    libName = "OpenAL.framework/OpenAL";
+#endif
+    if (libName == NULL) {
+        return MAL_NO_BACKEND;  // Don't know what the library name is called.
+    }
+
+
+    pContext->openal.hOpenAL = mal_dlopen(libName);
+
+#ifdef MAL_WIN32
+    // Special case for Win32 - try "soft_oal.dll" for OpenAL-Soft drop-ins.
+    if (pContext->openal.hOpenAL == NULL) {
+        pContext->openal.hOpenAL = mal_dlopen("soft_oal.dll");
+    }
+#endif
+
+    if (pContext->openal.hOpenAL == NULL) {
+        return MAL_FAILED_TO_INIT_BACKEND;
+    }
+
+    pContext->openal.alcCreateContext       = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCreateContext");
+    pContext->openal.alcMakeContextCurrent  = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcMakeContextCurrent");
+    pContext->openal.alcProcessContext      = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcProcessContext");
+    pContext->openal.alcSuspendContext      = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcSuspendContext");
+    pContext->openal.alcDestroyContext      = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcDestroyContext");
+    pContext->openal.alcGetCurrentContext   = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetCurrentContext");
+    pContext->openal.alcGetContextsDevice   = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetContextsDevice");
+    pContext->openal.alcOpenDevice          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcOpenDevice");
+    pContext->openal.alcCloseDevice         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCloseDevice");
+    pContext->openal.alcGetError            = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetError");
+    pContext->openal.alcIsExtensionPresent  = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcIsExtensionPresent");
+    pContext->openal.alcGetProcAddress      = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetProcAddress");
+    pContext->openal.alcGetEnumValue        = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetEnumValue");
+    pContext->openal.alcGetString           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetString");
+    pContext->openal.alcGetIntegerv         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcGetIntegerv");
+    pContext->openal.alcCaptureOpenDevice   = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCaptureOpenDevice");
+    pContext->openal.alcCaptureCloseDevice  = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCaptureCloseDevice");
+    pContext->openal.alcCaptureStart        = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCaptureStart");
+    pContext->openal.alcCaptureStop         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCaptureStop");
+    pContext->openal.alcCaptureSamples      = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alcCaptureSamples");
+
+    pContext->openal.alEnable               = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alEnable");
+    pContext->openal.alDisable              = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alDisable");
+    pContext->openal.alIsEnabled            = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alIsEnabled");
+    pContext->openal.alGetString            = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetString");
+    pContext->openal.alGetBooleanv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBooleanv");
+    pContext->openal.alGetIntegerv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetIntegerv");
+    pContext->openal.alGetFloatv            = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetFloatv");
+    pContext->openal.alGetDoublev           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetDoublev");
+    pContext->openal.alGetBoolean           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBoolean");
+    pContext->openal.alGetInteger           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetInteger");
+    pContext->openal.alGetFloat             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetFloat");
+    pContext->openal.alGetDouble            = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetDouble");
+    pContext->openal.alGetError             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetError");
+    pContext->openal.alIsExtensionPresent   = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alIsExtensionPresent");
+    pContext->openal.alGetProcAddress       = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetProcAddress");
+    pContext->openal.alGetEnumValue         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetEnumValue");
+    pContext->openal.alGenSources           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGenSources");
+    pContext->openal.alDeleteSources        = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alDeleteSources");
+    pContext->openal.alIsSource             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alIsSource");
+    pContext->openal.alSourcef              = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcef");
+    pContext->openal.alSource3f             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSource3f");
+    pContext->openal.alSourcefv             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcefv");
+    pContext->openal.alSourcei              = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcei");
+    pContext->openal.alSource3i             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSource3i");
+    pContext->openal.alSourceiv             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceiv");
+    pContext->openal.alGetSourcef           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSourcef");
+    pContext->openal.alGetSource3f          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSource3f");
+    pContext->openal.alGetSourcefv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSourcefv");
+    pContext->openal.alGetSourcei           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSourcei");
+    pContext->openal.alGetSource3i          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSource3i");
+    pContext->openal.alGetSourceiv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetSourceiv");
+    pContext->openal.alSourcePlayv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcePlayv");
+    pContext->openal.alSourceStopv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceStopv");
+    pContext->openal.alSourceRewindv        = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceRewindv");
+    pContext->openal.alSourcePausev         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcePausev");
+    pContext->openal.alSourcePlay           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcePlay");
+    pContext->openal.alSourceStop           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceStop");
+    pContext->openal.alSourceRewind         = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceRewind");
+    pContext->openal.alSourcePause          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourcePause");
+    pContext->openal.alSourceQueueBuffers   = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceQueueBuffers");
+    pContext->openal.alSourceUnqueueBuffers = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alSourceUnqueueBuffers");
+    pContext->openal.alGenBuffers           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGenBuffers");
+    pContext->openal.alDeleteBuffers        = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alDeleteBuffers");
+    pContext->openal.alIsBuffer             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alIsBuffer");
+    pContext->openal.alBufferData           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBufferData");
+    pContext->openal.alBufferf              = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBufferf");
+    pContext->openal.alBuffer3f             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBuffer3f");
+    pContext->openal.alBufferfv             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBufferfv");
+    pContext->openal.alBufferi              = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBufferi");
+    pContext->openal.alBuffer3i             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBuffer3i");
+    pContext->openal.alBufferiv             = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alBufferiv");
+    pContext->openal.alGetBufferf           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBufferf");
+    pContext->openal.alGetBuffer3f          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBuffer3f");
+    pContext->openal.alGetBufferfv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBufferfv");
+    pContext->openal.alGetBufferi           = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBufferi");
+    pContext->openal.alGetBuffer3i          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBuffer3i");
+    pContext->openal.alGetBufferiv          = (mal_proc)mal_dlsym(pContext->openal.hOpenAL, "alGetBufferiv");
+#else
+    pContext->openal.alcCreateContext       = (mal_proc)alcCreateContext;
+    pContext->openal.alcMakeContextCurrent  = (mal_proc)alcMakeContextCurrent;
+    pContext->openal.alcProcessContext      = (mal_proc)alcProcessContext;
+    pContext->openal.alcSuspendContext      = (mal_proc)alcSuspendContext;
+    pContext->openal.alcDestroyContext      = (mal_proc)alcDestroyContext;
+    pContext->openal.alcGetCurrentContext   = (mal_proc)alcGetCurrentContext;
+    pContext->openal.alcGetContextsDevice   = (mal_proc)alcGetContextsDevice;
+    pContext->openal.alcOpenDevice          = (mal_proc)alcOpenDevice;
+    pContext->openal.alcCloseDevice         = (mal_proc)alcCloseDevice;
+    pContext->openal.alcGetError            = (mal_proc)alcGetError;
+    pContext->openal.alcIsExtensionPresent  = (mal_proc)alcIsExtensionPresent;
+    pContext->openal.alcGetProcAddress      = (mal_proc)alcGetProcAddress;
+    pContext->openal.alcGetEnumValue        = (mal_proc)alcGetEnumValue;
+    pContext->openal.alcGetString           = (mal_proc)alcGetString;
+    pContext->openal.alcGetIntegerv         = (mal_proc)alcGetIntegerv;
+    pContext->openal.alcCaptureOpenDevice   = (mal_proc)alcCaptureOpenDevice;
+    pContext->openal.alcCaptureCloseDevice  = (mal_proc)alcCaptureCloseDevice;
+    pContext->openal.alcCaptureStart        = (mal_proc)alcCaptureStart;
+    pContext->openal.alcCaptureStop         = (mal_proc)alcCaptureStop;
+    pContext->openal.alcCaptureSamples      = (mal_proc)alcCaptureSamples;
+
+    pContext->openal.alEnable               = (mal_proc)alEnable;
+    pContext->openal.alDisable              = (mal_proc)alDisable;
+    pContext->openal.alIsEnabled            = (mal_proc)alIsEnabled;
+    pContext->openal.alGetString            = (mal_proc)alGetString;
+    pContext->openal.alGetBooleanv          = (mal_proc)alGetBooleanv;
+    pContext->openal.alGetIntegerv          = (mal_proc)alGetIntegerv;
+    pContext->openal.alGetFloatv            = (mal_proc)alGetFloatv;
+    pContext->openal.alGetDoublev           = (mal_proc)alGetDoublev;
+    pContext->openal.alGetBoolean           = (mal_proc)alGetBoolean;
+    pContext->openal.alGetInteger           = (mal_proc)alGetInteger;
+    pContext->openal.alGetFloat             = (mal_proc)alGetFloat;
+    pContext->openal.alGetDouble            = (mal_proc)alGetDouble;
+    pContext->openal.alGetError             = (mal_proc)alGetError;
+    pContext->openal.alIsExtensionPresent   = (mal_proc)alIsExtensionPresent;
+    pContext->openal.alGetProcAddress       = (mal_proc)alGetProcAddress;
+    pContext->openal.alGetEnumValue         = (mal_proc)alGetEnumValue;
+    pContext->openal.alGenSources           = (mal_proc)alGenSources;
+    pContext->openal.alDeleteSources        = (mal_proc)alDeleteSources;
+    pContext->openal.alIsSource             = (mal_proc)alIsSource;
+    pContext->openal.alSourcef              = (mal_proc)alSourcef;
+    pContext->openal.alSource3f             = (mal_proc)alSource3f;
+    pContext->openal.alSourcefv             = (mal_proc)alSourcefv;
+    pContext->openal.alSourcei              = (mal_proc)alSourcei;
+    pContext->openal.alSource3i             = (mal_proc)alSource3i;
+    pContext->openal.alSourceiv             = (mal_proc)alSourceiv;
+    pContext->openal.alGetSourcef           = (mal_proc)alGetSourcef;
+    pContext->openal.alGetSource3f          = (mal_proc)alGetSource3f;
+    pContext->openal.alGetSourcefv          = (mal_proc)alGetSourcefv;
+    pContext->openal.alGetSourcei           = (mal_proc)alGetSourcei;
+    pContext->openal.alGetSource3i          = (mal_proc)alGetSource3i;
+    pContext->openal.alGetSourceiv          = (mal_proc)alGetSourceiv;
+    pContext->openal.alSourcePlayv          = (mal_proc)alSourcePlayv;
+    pContext->openal.alSourceStopv          = (mal_proc)alSourceStopv;
+    pContext->openal.alSourceRewindv        = (mal_proc)alSourceRewindv;
+    pContext->openal.alSourcePausev         = (mal_proc)alSourcePausev;
+    pContext->openal.alSourcePlay           = (mal_proc)alSourcePlay;
+    pContext->openal.alSourceStop           = (mal_proc)alSourceStop;
+    pContext->openal.alSourceRewind         = (mal_proc)alSourceRewind;
+    pContext->openal.alSourcePause          = (mal_proc)alSourcePause;
+    pContext->openal.alSourceQueueBuffers   = (mal_proc)alSourceQueueBuffers;
+    pContext->openal.alSourceUnqueueBuffers = (mal_proc)alSourceUnqueueBuffers;
+    pContext->openal.alGenBuffers           = (mal_proc)alGenBuffers;
+    pContext->openal.alDeleteBuffers        = (mal_proc)alDeleteBuffers;
+    pContext->openal.alIsBuffer             = (mal_proc)alIsBuffer;
+    pContext->openal.alBufferData           = (mal_proc)alBufferData;
+    pContext->openal.alBufferf              = (mal_proc)alBufferf;
+    pContext->openal.alBuffer3f             = (mal_proc)alBuffer3f;
+    pContext->openal.alBufferfv             = (mal_proc)alBufferfv;
+    pContext->openal.alBufferi              = (mal_proc)alBufferi;
+    pContext->openal.alBuffer3i             = (mal_proc)alBuffer3i;
+    pContext->openal.alBufferiv             = (mal_proc)alBufferiv;
+    pContext->openal.alGetBufferf           = (mal_proc)alGetBufferf;
+    pContext->openal.alGetBuffer3f          = (mal_proc)alGetBuffer3f;
+    pContext->openal.alGetBufferfv          = (mal_proc)alGetBufferfv;
+    pContext->openal.alGetBufferi           = (mal_proc)alGetBufferi;
+    pContext->openal.alGetBuffer3i          = (mal_proc)alGetBuffer3i;
+    pContext->openal.alGetBufferiv          = (mal_proc)alGetBufferiv;
+#endif
+
+    // We depend on the ALC_ENUMERATION_EXT extension for enumeration. If this is not supported we fall back to default devices.
+    pContext->openal.isEnumerationSupported = ((MAL_LPALCISEXTENSIONPRESENT)pContext->openal.alcIsExtensionPresent)(NULL, "ALC_ENUMERATION_EXT");
+    pContext->openal.isFloat32Supported = ((MAL_LPALISEXTENSIONPRESENT)pContext->openal.alIsExtensionPresent)("AL_EXT_float32");
+    pContext->openal.isMCFormatsSupported = ((MAL_LPALISEXTENSIONPRESENT)pContext->openal.alIsExtensionPresent)("AL_EXT_MCFORMATS");
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__openal(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_openal);
+
+#ifndef MAL_NO_RUNTIME_LINKING
+    mal_dlclose(pContext->openal.hOpenAL);
+#endif
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_enumerate_devices__openal(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+    if (pContext->openal.isEnumerationSupported) {
+        const mal_ALCchar* pDeviceNames = ((MAL_LPALCGETSTRING)pContext->openal.alcGetString)(NULL, (type == mal_device_type_playback) ? MAL_ALC_DEVICE_SPECIFIER : MAL_ALC_CAPTURE_DEVICE_SPECIFIER);
+        if (pDeviceNames == NULL) {
+            return MAL_NO_DEVICE;
+        }
+
+        // Each device is stored in pDeviceNames, separated by a null-terminator. The string itself is double-null-terminated.
+        const mal_ALCchar* pNextDeviceName = pDeviceNames;
+        while (pNextDeviceName[0] != '\0') {
+            if (pInfo != NULL) {
+                if (infoSize > 0) {
+                    mal_strncpy_s(pInfo->id.openal, sizeof(pInfo->id.openal), (const char*)pNextDeviceName, (size_t)-1);
+                    mal_strncpy_s(pInfo->name,      sizeof(pInfo->name),      (const char*)pNextDeviceName, (size_t)-1);
+
+                    pInfo += 1;
+                    infoSize -= 1;
+                    *pCount += 1;
+                }
+            } else {
+                *pCount += 1;
+            }
+
+            // Move to the next device name.
+            while (*pNextDeviceName != '\0') {
+                pNextDeviceName += 1;
+            }
+
+            // Skip past the null terminator.
+            pNextDeviceName += 1;
+        };
+    } else {
+        // Enumeration is not supported. Use default devices.
+        if (pInfo != NULL) {
+            if (infoSize > 0) {
+                if (type == mal_device_type_playback) {
+                    pInfo->id.sdl = 0;
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Playback Device", (size_t)-1);
+                } else {
+                    pInfo->id.sdl = 0;
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Capture Device", (size_t)-1);
+                }
+
+                pInfo += 1;
+                *pCount += 1;
+            }
+        } else {
+            *pCount += 1;
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_device_uninit__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)(NULL);
+    ((MAL_LPALCDESTROYCONTEXT)pDevice->pContext->openal.alcDestroyContext)((mal_ALCcontext*)pDevice->openal.pContextALC);
+
+    if (pDevice->type == mal_device_type_playback) {
+        ((MAL_LPALCCLOSEDEVICE)pDevice->pContext->openal.alcCloseDevice)((mal_ALCdevice*)pDevice->openal.pDeviceALC);
+    } else {
+        ((MAL_LPALCCAPTURECLOSEDEVICE)pDevice->pContext->openal.alcCaptureCloseDevice)((mal_ALCdevice*)pDevice->openal.pDeviceALC);
+    }
+
+    mal_free(pDevice->openal.pIntermediaryBuffer);
+}
+
+mal_result mal_device_init__openal(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    if (pDevice->periods > MAL_MAX_PERIODS_OPENAL) {
+        pDevice->periods = MAL_MAX_PERIODS_OPENAL;
+    }
+
+    // OpenAL has bad latency in my testing :(
+    if (pDevice->usingDefaultBufferSize) {
+        pDevice->bufferSizeInFrames *= 4;
+    }
+
+    mal_ALCsizei bufferSizeInSamplesAL = pDevice->bufferSizeInFrames;
+    mal_ALCuint frequencyAL = pConfig->sampleRate;
+
+    mal_uint32 channelsAL = 0;
+
+    // OpenAL currently only supports only mono and stereo. TODO: Check for the AL_EXT_MCFORMATS extension and use one of those formats for quad, 5.1, etc.
+    mal_ALCenum formatAL = 0;
+    if (pConfig->channels == 1) {
+        // Mono.
+        channelsAL = 1;
+        if (pConfig->format == mal_format_f32) {
+            if (pContext->openal.isFloat32Supported) {
+                formatAL = MAL_AL_FORMAT_MONO_FLOAT32;
+            } else {
+                formatAL = MAL_AL_FORMAT_MONO16;
+            }
+        } else if (pConfig->format == mal_format_s32) {
+            formatAL = MAL_AL_FORMAT_MONO16;
+        } else if (pConfig->format == mal_format_s24) {
+            formatAL = MAL_AL_FORMAT_MONO16;
+        } else if (pConfig->format == mal_format_s16) {
+            formatAL = MAL_AL_FORMAT_MONO16;
+        } else if (pConfig->format == mal_format_u8) {
+            formatAL = MAL_AL_FORMAT_MONO8;
+        }
+    } else {
+        // Stereo.
+        channelsAL = 2;
+        if (pConfig->format == mal_format_f32) {
+            if (pContext->openal.isFloat32Supported) {
+                formatAL = MAL_AL_FORMAT_STEREO_FLOAT32;
+            } else {
+                formatAL = MAL_AL_FORMAT_STEREO16;
+            }
+        } else if (pConfig->format == mal_format_s32) {
+            formatAL = MAL_AL_FORMAT_STEREO16;
+        } else if (pConfig->format == mal_format_s24) {
+            formatAL = MAL_AL_FORMAT_STEREO16;
+        } else if (pConfig->format == mal_format_s16) {
+            formatAL = MAL_AL_FORMAT_STEREO16;
+        } else if (pConfig->format == mal_format_u8) {
+            formatAL = MAL_AL_FORMAT_STEREO8;
+        }
+    }
+
+    if (formatAL == 0) {
+        return mal_context_post_error(pContext, NULL, "[OpenAL] Format not supported.", MAL_FORMAT_NOT_SUPPORTED);
+    }
+
+    bufferSizeInSamplesAL *= channelsAL;
+
+
+    // OpenAL feels a bit unintuitive to me... The global object is a device, and it would appear that each device can have
+    // many context's...
+    mal_ALCdevice* pDeviceALC = NULL;
+    if (type == mal_device_type_playback) {
+        pDeviceALC = ((MAL_LPALCOPENDEVICE)pContext->openal.alcOpenDevice)((pDeviceID == NULL) ? NULL : pDeviceID->openal);
+    } else {
+        pDeviceALC = ((MAL_LPALCCAPTUREOPENDEVICE)pContext->openal.alcCaptureOpenDevice)((pDeviceID == NULL) ? NULL : pDeviceID->openal, frequencyAL, formatAL, bufferSizeInSamplesAL);
+    }
+
+    if (pDeviceALC == NULL) {
+        return mal_context_post_error(pContext, NULL, "[OpenAL] Failed to open device.", MAL_FAILED_TO_INIT_BACKEND);
+    }
+
+    // A context is only required for playback.
+    mal_ALCcontext* pContextALC = NULL;
+    if (pDevice->type == mal_device_type_playback) {
+        pContextALC = ((MAL_LPALCCREATECONTEXT)pContext->openal.alcCreateContext)(pDeviceALC, NULL);
+        if (pContextALC == NULL) {
+            ((MAL_LPALCCLOSEDEVICE)pDevice->pContext->openal.alcCloseDevice)(pDeviceALC);
+            return mal_context_post_error(pContext, NULL, "[OpenAL] Failed to open OpenAL context.", MAL_FAILED_TO_INIT_BACKEND);
+        }
+
+        ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)(pContextALC);
+
+        mal_ALuint sourceAL;
+        ((MAL_LPALGENSOURCES)pDevice->pContext->openal.alGenSources)(1, &sourceAL);
+        pDevice->openal.sourceAL = sourceAL;
+
+        // We create the buffers, but only fill and queue them when the device is started.
+        mal_ALuint buffersAL[MAL_MAX_PERIODS_OPENAL];
+        ((MAL_LPALGENBUFFERS)pDevice->pContext->openal.alGenBuffers)(pDevice->periods, buffersAL);
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            pDevice->openal.buffersAL[i] = buffersAL[i];
+        }
+    }
+
+    pDevice->internalChannels = channelsAL;
+    pDevice->internalSampleRate = frequencyAL;
+
+    // The internal format is a little bit straight with OpenAL.
+    switch (formatAL)
+    {
+        case MAL_AL_FORMAT_MONO8:
+        case MAL_AL_FORMAT_STEREO8:
+        case MAL_AL_FORMAT_REAR8:
+        case MAL_AL_FORMAT_QUAD8:
+        case MAL_AL_FORMAT_51CHN8:
+        case MAL_AL_FORMAT_61CHN8:
+        case MAL_AL_FORMAT_71CHN8:
+        {
+            pDevice->internalFormat = mal_format_u8;
+        } break;
+
+        case MAL_AL_FORMAT_MONO16:
+        case MAL_AL_FORMAT_STEREO16:
+        case MAL_AL_FORMAT_REAR16:
+        case MAL_AL_FORMAT_QUAD16:
+        case MAL_AL_FORMAT_51CHN16:
+        case MAL_AL_FORMAT_61CHN16:
+        case MAL_AL_FORMAT_71CHN16:
+        {
+            pDevice->internalFormat = mal_format_s16;
+        } break;
+
+        case MAL_AL_FORMAT_REAR32:
+        case MAL_AL_FORMAT_QUAD32:
+        case MAL_AL_FORMAT_51CHN32:
+        case MAL_AL_FORMAT_61CHN32:
+        case MAL_AL_FORMAT_71CHN32:
+        {
+            pDevice->internalFormat = mal_format_s32;
+        } break;
+
+        case MAL_AL_FORMAT_MONO_FLOAT32:
+        case MAL_AL_FORMAT_STEREO_FLOAT32:
+        {
+            pDevice->internalFormat = mal_format_f32;
+        } break;
+    }
+
+    // From what I can tell, the ordering of channels is fixed for OpenAL.
+    switch (formatAL)
+    {
+        case MAL_AL_FORMAT_MONO8:
+        case MAL_AL_FORMAT_MONO16:
+        case MAL_AL_FORMAT_MONO_FLOAT32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_CENTER;
+        } break;
+
+        case MAL_AL_FORMAT_STEREO8:
+        case MAL_AL_FORMAT_STEREO16:
+        case MAL_AL_FORMAT_STEREO_FLOAT32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+        } break;
+
+        case MAL_AL_FORMAT_REAR8:
+        case MAL_AL_FORMAT_REAR16:
+        case MAL_AL_FORMAT_REAR32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_BACK_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_BACK_RIGHT;
+        } break;
+
+        case MAL_AL_FORMAT_QUAD8:
+        case MAL_AL_FORMAT_QUAD16:
+        case MAL_AL_FORMAT_QUAD32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            pDevice->internalChannelMap[2] = MAL_CHANNEL_BACK_LEFT;
+            pDevice->internalChannelMap[3] = MAL_CHANNEL_BACK_RIGHT;
+        } break;
+
+        case MAL_AL_FORMAT_51CHN8:
+        case MAL_AL_FORMAT_51CHN16:
+        case MAL_AL_FORMAT_51CHN32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            pDevice->internalChannelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            pDevice->internalChannelMap[3] = MAL_CHANNEL_LFE;
+            pDevice->internalChannelMap[4] = MAL_CHANNEL_BACK_LEFT;
+            pDevice->internalChannelMap[5] = MAL_CHANNEL_BACK_RIGHT;
+        } break;
+
+        case MAL_AL_FORMAT_61CHN8:
+        case MAL_AL_FORMAT_61CHN16:
+        case MAL_AL_FORMAT_61CHN32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            pDevice->internalChannelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            pDevice->internalChannelMap[3] = MAL_CHANNEL_LFE;
+            pDevice->internalChannelMap[4] = MAL_CHANNEL_BACK_CENTER;
+            pDevice->internalChannelMap[5] = MAL_CHANNEL_SIDE_LEFT;
+            pDevice->internalChannelMap[6] = MAL_CHANNEL_SIDE_RIGHT;
+        } break;
+
+        case MAL_AL_FORMAT_71CHN8:
+        case MAL_AL_FORMAT_71CHN16:
+        case MAL_AL_FORMAT_71CHN32:
+        {
+            pDevice->internalChannelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            pDevice->internalChannelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            pDevice->internalChannelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            pDevice->internalChannelMap[3] = MAL_CHANNEL_LFE;
+            pDevice->internalChannelMap[4] = MAL_CHANNEL_BACK_LEFT;
+            pDevice->internalChannelMap[5] = MAL_CHANNEL_BACK_RIGHT;
+            pDevice->internalChannelMap[6] = MAL_CHANNEL_SIDE_LEFT;
+            pDevice->internalChannelMap[7] = MAL_CHANNEL_SIDE_RIGHT;
+        } break;
+
+        default: break;
+    }
+
+    pDevice->openal.pDeviceALC = pDeviceALC;
+    pDevice->openal.pContextALC = pContextALC;
+    pDevice->openal.formatAL = formatAL;
+    pDevice->openal.subBufferSizeInFrames = pDevice->bufferSizeInFrames / pDevice->periods;
+    pDevice->openal.pIntermediaryBuffer = (mal_uint8*)mal_malloc(pDevice->openal.subBufferSizeInFrames * channelsAL * mal_get_sample_size_in_bytes(pDevice->internalFormat));
+    if (pDevice->openal.pIntermediaryBuffer == NULL) {
+        mal_device_uninit__openal(pDevice);
+        return mal_context_post_error(pContext, NULL, "[OpenAL] Failed to allocate memory for intermediary buffer.", MAL_OUT_OF_MEMORY);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__start_backend__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        // Playback.
+        //
+        // When starting playback we want to ensure each buffer is filled and queued before playing the source.
+        pDevice->openal.iNextBuffer = 0;
+
+        ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)((mal_ALCcontext*)pDevice->openal.pContextALC);
+
+        for (mal_uint32 i = 0; i < pDevice->periods; ++i) {
+            mal_device__read_frames_from_client(pDevice, pDevice->openal.subBufferSizeInFrames, pDevice->openal.pIntermediaryBuffer);
+
+            mal_ALuint bufferAL = pDevice->openal.buffersAL[i];
+            ((MAL_LPALBUFFERDATA)pDevice->pContext->openal.alBufferData)(bufferAL, pDevice->openal.formatAL, pDevice->openal.pIntermediaryBuffer, pDevice->openal.subBufferSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat), pDevice->internalSampleRate);
+            ((MAL_LPALSOURCEQUEUEBUFFERS)pDevice->pContext->openal.alSourceQueueBuffers)(pDevice->openal.sourceAL, 1, &bufferAL);
+        }
+
+        // Start the source only after filling and queueing each buffer.
+        ((MAL_LPALSOURCEPLAY)pDevice->pContext->openal.alSourcePlay)(pDevice->openal.sourceAL);
+    } else {
+        // Capture.
+        ((MAL_LPALCCAPTURESTART)pDevice->pContext->openal.alcCaptureStart)((mal_ALCdevice*)pDevice->openal.pDeviceALC);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)((mal_ALCcontext*)pDevice->openal.pContextALC);
+        ((MAL_LPALSOURCESTOP)pDevice->pContext->openal.alSourceStop)(pDevice->openal.sourceAL);
+    } else {
+        ((MAL_LPALCCAPTURESTOP)pDevice->pContext->openal.alcCaptureStop)((mal_ALCdevice*)pDevice->openal.pDeviceALC);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__break_main_loop__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->openal.breakFromMainLoop = MAL_TRUE;
+    return MAL_SUCCESS;
+}
+
+static mal_uint32 mal_device__get_available_frames__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    if (pDevice->type == mal_device_type_playback) {
+        ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)((mal_ALCcontext*)pDevice->openal.pContextALC);
+
+        mal_ALint processedBufferCount = 0;
+        ((MAL_LPALGETSOURCEI)pDevice->pContext->openal.alGetSourcei)(pDevice->openal.sourceAL, MAL_AL_BUFFERS_PROCESSED, &processedBufferCount);
+
+        return processedBufferCount * pDevice->openal.subBufferSizeInFrames;
+    } else {
+        mal_ALint samplesAvailable = 0;
+        ((MAL_LPALCGETINTEGERV)pDevice->pContext->openal.alcGetIntegerv)((mal_ALCdevice*)pDevice->openal.pDeviceALC, MAL_ALC_CAPTURE_SAMPLES, 1, &samplesAvailable);
+
+        return samplesAvailable / pDevice->channels;
+    }
+}
+
+static mal_uint32 mal_device__wait_for_frames__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    while (!pDevice->openal.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__get_available_frames__openal(pDevice);
+        if (framesAvailable > 0) {
+            return framesAvailable;
+        }
+
+        mal_sleep(1);
+    }
+
+    // We'll get here if the loop was terminated. When capturing we want to return whatever is available. For playback we just drop it.
+    if (pDevice->type == mal_device_type_playback) {
+        return 0;
+    } else {
+        return mal_device__get_available_frames__openal(pDevice);
+    }
+}
+
+static mal_result mal_device__main_loop__openal(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    pDevice->openal.breakFromMainLoop = MAL_FALSE;
+    while (!pDevice->openal.breakFromMainLoop) {
+        mal_uint32 framesAvailable = mal_device__wait_for_frames__openal(pDevice);
+        if (framesAvailable == 0) {
+            continue;
+        }
+
+        // If it's a playback device, don't bother grabbing more data if the device is being stopped.
+        if (pDevice->openal.breakFromMainLoop && pDevice->type == mal_device_type_playback) {
+            return MAL_FALSE;
+        }
+
+        if (pDevice->type == mal_device_type_playback) {
+            while (framesAvailable > 0) {
+                mal_uint32 framesToRead = (framesAvailable > pDevice->openal.subBufferSizeInFrames) ? pDevice->openal.subBufferSizeInFrames : framesAvailable;
+
+                mal_ALuint bufferAL = pDevice->openal.buffersAL[pDevice->openal.iNextBuffer];
+                pDevice->openal.iNextBuffer = (pDevice->openal.iNextBuffer + 1) % pDevice->periods;
+
+                mal_device__read_frames_from_client(pDevice, framesToRead, pDevice->openal.pIntermediaryBuffer);
+
+                ((MAL_LPALCMAKECONTEXTCURRENT)pDevice->pContext->openal.alcMakeContextCurrent)((mal_ALCcontext*)pDevice->openal.pContextALC);
+                ((MAL_LPALSOURCEUNQUEUEBUFFERS)pDevice->pContext->openal.alSourceUnqueueBuffers)(pDevice->openal.sourceAL, 1, &bufferAL);
+                ((MAL_LPALBUFFERDATA)pDevice->pContext->openal.alBufferData)(bufferAL, pDevice->openal.formatAL, pDevice->openal.pIntermediaryBuffer, pDevice->openal.subBufferSizeInFrames * pDevice->internalChannels * mal_get_sample_size_in_bytes(pDevice->internalFormat), pDevice->internalSampleRate);
+                ((MAL_LPALSOURCEQUEUEBUFFERS)pDevice->pContext->openal.alSourceQueueBuffers)(pDevice->openal.sourceAL, 1, &bufferAL);
+
+                framesAvailable -= framesToRead;
+            }
+
+
+            // There's a chance the source has stopped playing due to there not being any buffer's queue. Make sure it's restarted.
+            mal_ALenum state;
+            ((MAL_LPALGETSOURCEI)pDevice->pContext->openal.alGetSourcei)(pDevice->openal.sourceAL, MAL_AL_SOURCE_STATE, &state);
+
+            if (state != MAL_AL_PLAYING) {
+                ((MAL_LPALSOURCEPLAY)pDevice->pContext->openal.alSourcePlay)(pDevice->openal.sourceAL);
+            }
+        } else {
+            while (framesAvailable > 0) {
+                mal_uint32 framesToSend = (framesAvailable > pDevice->openal.subBufferSizeInFrames) ? pDevice->openal.subBufferSizeInFrames : framesAvailable;
+                ((MAL_LPALCCAPTURESAMPLES)pDevice->pContext->openal.alcCaptureSamples)((mal_ALCdevice*)pDevice->openal.pDeviceALC, pDevice->openal.pIntermediaryBuffer, framesToSend);
+
+                mal_device__send_frames_to_client(pDevice, framesToSend, pDevice->openal.pIntermediaryBuffer);
+                framesAvailable -= framesToSend;
+            }
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+#endif  // OpenAL
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// SDL Backend
+//
+///////////////////////////////////////////////////////////////////////////////
+#ifdef MAL_HAS_SDL
+
+//#define MAL_USE_SDL_1
+
+#define MAL_SDL_INIT_AUDIO                      0x00000010
+#define MAL_AUDIO_U8                            0x0008
+#define MAL_AUDIO_S16                           0x8010
+#define MAL_AUDIO_S32                           0x8020
+#define MAL_AUDIO_F32                           0x8120
+#define MAL_SDL_AUDIO_ALLOW_FREQUENCY_CHANGE    0x00000001
+#define MAL_SDL_AUDIO_ALLOW_FORMAT_CHANGE       0x00000002
+#define MAL_SDL_AUDIO_ALLOW_CHANNELS_CHANGE     0x00000004
+#define MAL_SDL_AUDIO_ALLOW_ANY_CHANGE          (MAL_SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | MAL_SDL_AUDIO_ALLOW_FORMAT_CHANGE | MAL_SDL_AUDIO_ALLOW_CHANNELS_CHANGE)
+
+// If we are linking at compile time we'll just #include SDL.h. Otherwise we can just redeclare some stuff to avoid the
+// need for development packages to be installed.
+#ifdef MAL_NO_RUNTIME_LINKING
+    #define SDL_MAIN_HANDLED
+    #ifdef MAL_EMSCRIPTEN
+        #include <SDL/SDL.h>
+
+        // For now just use SDL 1.2 with Emscripten. This avoids the need for "-s USE_SDL=2" at compile time.
+        #ifndef MAL_USE_SDL_1
+        #define MAL_USE_SDL_1
+        #endif
+    #else
+        #include <SDL2/SDL.h>
+    #endif
+
+    typedef SDL_AudioCallback   MAL_SDL_AudioCallback;
+    typedef SDL_AudioSpec       MAL_SDL_AudioSpec;
+    typedef SDL_AudioFormat     MAL_SDL_AudioFormat;
+    typedef SDL_AudioDeviceID   MAL_SDL_AudioDeviceID;
+#else
+    typedef void (* MAL_SDL_AudioCallback)(void* userdata, mal_uint8* stream, int len);
+    typedef mal_uint16 MAL_SDL_AudioFormat;
+    typedef mal_uint32 MAL_SDL_AudioDeviceID;
+
+    typedef struct MAL_SDL_AudioSpec
+    {
+        int freq;
+        MAL_SDL_AudioFormat format;
+        mal_uint8 channels;
+        mal_uint8 silence;
+        mal_uint16 samples;
+        mal_uint16 padding;
+        mal_uint32 size;
+        MAL_SDL_AudioCallback callback;
+        void* userdata;
+    } MAL_SDL_AudioSpec;
+#endif
+
+typedef int                   (* MAL_PFN_SDL_InitSubSystem)(mal_uint32 flags);
+typedef void                  (* MAL_PFN_SDL_QuitSubSystem)(mal_uint32 flags);
+typedef int                   (* MAL_PFN_SDL_GetNumAudioDevices)(int iscapture);
+typedef const char*           (* MAL_PFN_SDL_GetAudioDeviceName)(int index, int iscapture);
+typedef void                  (* MAL_PFN_SDL_CloseAudio)(void);
+typedef void                  (* MAL_PFN_SDL_CloseAudioDevice)(MAL_SDL_AudioDeviceID dev);
+typedef int                   (* MAL_PFN_SDL_OpenAudio)(MAL_SDL_AudioSpec* desired, MAL_SDL_AudioSpec* obtained);
+typedef MAL_SDL_AudioDeviceID (* MAL_PFN_SDL_OpenAudioDevice)(const char* device, int iscapture, const MAL_SDL_AudioSpec* desired, MAL_SDL_AudioSpec* obtained, int allowed_changes);
+typedef void                  (* MAL_PFN_SDL_PauseAudio)(int pause_on);
+typedef void                  (* MAL_PFN_SDL_PauseAudioDevice)(MAL_SDL_AudioDeviceID dev, int pause_on);
+
+MAL_SDL_AudioFormat mal_format_to_sdl(mal_format format)
+{
+    switch (format)
+    {
+    case mal_format_unknown: return 0;
+    case mal_format_u8:      return MAL_AUDIO_U8;
+    case mal_format_s16:     return MAL_AUDIO_S16;
+    case mal_format_s24:     return MAL_AUDIO_S32;  // Closest match.
+    case mal_format_s32:     return MAL_AUDIO_S32;
+    default:                 return 0;
+    }
+}
+
+mal_format mal_format_from_sdl(MAL_SDL_AudioFormat format)
+{
+    switch (format)
+    {
+        case MAL_AUDIO_U8:  return mal_format_u8;
+        case MAL_AUDIO_S16: return mal_format_s16;
+        case MAL_AUDIO_S32: return mal_format_s32;
+        case MAL_AUDIO_F32: return mal_format_f32;
+        default:            return mal_format_unknown;
+    }
+}
+
+
+mal_result mal_context_init__sdl(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+
+#ifndef MAL_NO_RUNTIME_LINKING
+    // Run-time linking.
+    const char* libNames[] = {
+#if defined(MAL_WIN32)
+        "SDL2.dll",
+        "SDL.dll"
+#elif defined(MAL_APPLE)
+        "libSDL2-2.0.0.dylib",  // Can any Mac users out there comfirm these library names?
+        "libSDL-1.2.0.dylib"
+#else
+        "libSDL2-2.0.so.0",
+        "libSDL-1.2.so.0"
+#endif
+    };
+
+    for (size_t i = 0; i < mal_countof(libNames); ++i) {
+        pContext->sdl.hSDL = mal_dlopen(libNames[i]);
+        if (pContext->sdl.hSDL != NULL) {
+            break;
+        }
+    }
+
+    if (pContext->sdl.hSDL == NULL) {
+        return MAL_NO_BACKEND;  // Couldn't find SDL2.dll, etc. Most likely it's not installed.
+    }
+
+    pContext->sdl.SDL_InitSubSystem      = mal_dlsym(pContext->sdl.hSDL, "SDL_InitSubSystem");
+    pContext->sdl.SDL_QuitSubSystem      = mal_dlsym(pContext->sdl.hSDL, "SDL_QuitSubSystem");
+    pContext->sdl.SDL_CloseAudio         = mal_dlsym(pContext->sdl.hSDL, "SDL_CloseAudio");
+    pContext->sdl.SDL_OpenAudio          = mal_dlsym(pContext->sdl.hSDL, "SDL_OpenAudio");
+    pContext->sdl.SDL_PauseAudio         = mal_dlsym(pContext->sdl.hSDL, "SDL_PauseAudio");
+#ifndef MAL_USE_SDL_1
+    pContext->sdl.SDL_GetNumAudioDevices = mal_dlsym(pContext->sdl.hSDL, "SDL_GetNumAudioDevices");
+    pContext->sdl.SDL_GetAudioDeviceName = mal_dlsym(pContext->sdl.hSDL, "SDL_GetAudioDeviceName");
+    pContext->sdl.SDL_CloseAudioDevice   = mal_dlsym(pContext->sdl.hSDL, "SDL_CloseAudioDevice");
+    pContext->sdl.SDL_OpenAudioDevice    = mal_dlsym(pContext->sdl.hSDL, "SDL_OpenAudioDevice");
+    pContext->sdl.SDL_PauseAudioDevice   = mal_dlsym(pContext->sdl.hSDL, "SDL_PauseAudioDevice");
+#endif
+#else
+    // Compile-time linking.
+    pContext->sdl.SDL_InitSubSystem      = (mal_proc)SDL_InitSubSystem;
+    pContext->sdl.SDL_QuitSubSystem      = (mal_proc)SDL_QuitSubSystem;
+    pContext->sdl.SDL_CloseAudio         = (mal_proc)SDL_CloseAudio;
+    pContext->sdl.SDL_OpenAudio          = (mal_proc)SDL_OpenAudio;
+    pContext->sdl.SDL_PauseAudio         = (mal_proc)SDL_PauseAudio;
+#ifndef MAL_USE_SDL_1
+    pContext->sdl.SDL_GetNumAudioDevices = (mal_proc)SDL_GetNumAudioDevices;
+    pContext->sdl.SDL_GetAudioDeviceName = (mal_proc)SDL_GetAudioDeviceName;
+    pContext->sdl.SDL_CloseAudioDevice   = (mal_proc)SDL_CloseAudioDevice;
+    pContext->sdl.SDL_OpenAudioDevice    = (mal_proc)SDL_OpenAudioDevice;
+    pContext->sdl.SDL_PauseAudioDevice   = (mal_proc)SDL_PauseAudioDevice;
+#endif
+#endif
+
+    // We need to determine whether or not we are using SDL2 or SDL1. We can know this by looking at whether or not certain
+    // function pointers are NULL.
+    if (pContext->sdl.SDL_GetNumAudioDevices == NULL ||
+        pContext->sdl.SDL_GetAudioDeviceName == NULL ||
+        pContext->sdl.SDL_CloseAudioDevice   == NULL ||
+        pContext->sdl.SDL_OpenAudioDevice    == NULL ||
+        pContext->sdl.SDL_PauseAudioDevice   == NULL) {
+        pContext->sdl.usingSDL1 = MAL_TRUE;
+    }
+
+    int resultSDL = ((MAL_PFN_SDL_InitSubSystem)pContext->sdl.SDL_InitSubSystem)(MAL_SDL_INIT_AUDIO);
+    if (resultSDL != 0) {
+        return MAL_ERROR;
+    }
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_uninit__sdl(mal_context* pContext)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pContext->backend == mal_backend_sdl);
+
+    ((MAL_PFN_SDL_QuitSubSystem)pContext->sdl.SDL_QuitSubSystem)(MAL_SDL_INIT_AUDIO);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_enumerate_devices__sdl(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    (void)pContext;
+
+    mal_uint32 infoSize = *pCount;
+    *pCount = 0;
+
+#ifndef MAL_USE_SDL_1
+    if (!pContext->sdl.usingSDL1) {
+        int deviceCount = ((MAL_PFN_SDL_GetNumAudioDevices)pContext->sdl.SDL_GetNumAudioDevices)((type == mal_device_type_playback) ? 0 : 1);
+        for (int i = 0; i < deviceCount; ++i) {
+            if (pInfo != NULL) {
+                if (infoSize > 0) {
+                    pInfo->id.sdl = i;
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), ((MAL_PFN_SDL_GetAudioDeviceName)pContext->sdl.SDL_GetAudioDeviceName)(i, (type == mal_device_type_playback) ? 0 : 1), (size_t)-1);
+
+                    pInfo += 1;
+                    *pCount += 1;
+                }
+            } else {
+                *pCount += 1;
+            }
+        }
+    } else
+#endif
+    {
+        if (pInfo != NULL) {
+            if (infoSize > 0) {
+                // SDL1 uses default devices.
+                if (type == mal_device_type_playback) {
+                    pInfo->id.sdl = 0;
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Playback Device", (size_t)-1);
+                } else {
+                    pInfo->id.sdl = 0;
+                    mal_strncpy_s(pInfo->name, sizeof(pInfo->name), "Default Capture Device", (size_t)-1);
+                }
+
+                pInfo += 1;
+                *pCount += 1;
+            }
+        } else {
+            *pCount += 1;
+        }
+    }
+
+    return MAL_SUCCESS;
+}
+
+void mal_device_uninit__sdl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+#ifndef MAL_USE_SDL_1
+    if (!pDevice->pContext->sdl.usingSDL1) {
+        ((MAL_PFN_SDL_CloseAudioDevice)pDevice->pContext->sdl.SDL_CloseAudioDevice)(pDevice->sdl.deviceID);
+    } else
+#endif
+    {
+        ((MAL_PFN_SDL_CloseAudio)pDevice->pContext->sdl.SDL_CloseAudio)();
+    }
+}
+
+
+static void mal_audio_callback__sdl(void* pUserData, mal_uint8* pBuffer, int bufferSizeInBytes)
+{
+    mal_device* pDevice = (mal_device*)pUserData;
+    mal_assert(pDevice != NULL);
+
+    mal_uint32 bufferSizeInFrames = (mal_uint32)bufferSizeInBytes / mal_get_sample_size_in_bytes(pDevice->internalFormat) / pDevice->internalChannels;
+
+    if (pDevice->type == mal_device_type_playback) {
+        mal_device__read_frames_from_client(pDevice, bufferSizeInFrames, pBuffer);
+    } else {
+        mal_device__send_frames_to_client(pDevice, bufferSizeInFrames, pBuffer);
+    }
+}
+
+mal_result mal_device_init__sdl(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice)
+{
+    mal_assert(pContext != NULL);
+    mal_assert(pConfig != NULL);
+    mal_assert(pDevice != NULL);
+
+    // SDL wants the buffer size to be a power of 2. The SDL_AudioSpec property for this is only a Uint16, so we need
+    // to explicitly clamp this because it will be easy to overflow.
+    mal_uint32 bufferSize = pConfig->bufferSizeInFrames;
+    if (bufferSize > 32768) {
+        bufferSize = 32768;
+    } else {
+        bufferSize = mal_next_power_of_2(bufferSize);
+    }
+
+    mal_assert(bufferSize <= 32768);
+
+
+    MAL_SDL_AudioSpec desiredSpec, obtainedSpec;
+    mal_zero_memory(&desiredSpec, sizeof(desiredSpec));
+    desiredSpec.freq     = (int)pConfig->sampleRate;
+    desiredSpec.format   = mal_format_to_sdl(pConfig->format);
+    desiredSpec.channels = (mal_uint8)pConfig->channels;
+    desiredSpec.samples  = (mal_uint16)bufferSize;
+    desiredSpec.callback = mal_audio_callback__sdl;
+    desiredSpec.userdata = pDevice;
+
+    // Fall back to f32 if we don't have an appropriate mapping between mini_al and SDL.
+    if (desiredSpec.format == 0) {
+        desiredSpec.format = MAL_AUDIO_F32;
+    }
+
+#ifndef MAL_USE_SDL_1
+    if (!pDevice->pContext->sdl.usingSDL1) {
+        int isCapture = (type == mal_device_type_playback) ? 0 : 1;
+
+        const char* pDeviceName = NULL;
+        if (pDeviceID != NULL) {
+            pDeviceName = ((MAL_PFN_SDL_GetAudioDeviceName)pDevice->pContext->sdl.SDL_GetAudioDeviceName)(pDeviceID->sdl, isCapture);
+        }
+
+        pDevice->sdl.deviceID = ((MAL_PFN_SDL_OpenAudioDevice)pDevice->pContext->sdl.SDL_OpenAudioDevice)(pDeviceName, isCapture, &desiredSpec, &obtainedSpec, MAL_SDL_AUDIO_ALLOW_ANY_CHANGE);
+        if (pDevice->sdl.deviceID == 0) {
+            return mal_post_error(pDevice, "Failed to open SDL device.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+    } else
+#endif
+    {
+        // SDL1 uses default devices.
+        (void)pDeviceID;
+
+        // SDL1 only supports playback as far as I can tell.
+        if (type != mal_device_type_playback) {
+            return MAL_NO_DEVICE;
+        }
+
+        // SDL1 does not support floating point formats.
+        if (desiredSpec.format == MAL_AUDIO_F32) {
+            desiredSpec.format  = MAL_AUDIO_S16;
+        }
+
+        pDevice->sdl.deviceID = ((MAL_PFN_SDL_OpenAudio)pDevice->pContext->sdl.SDL_OpenAudio)(&desiredSpec, &obtainedSpec);
+        if (pDevice->sdl.deviceID != 0) {
+            return mal_post_error(pDevice, "Failed to open SDL device.", MAL_FAILED_TO_OPEN_BACKEND_DEVICE);
+        }
+    }
+
+    pDevice->internalFormat     = mal_format_from_sdl(obtainedSpec.format);
+    pDevice->internalChannels   = obtainedSpec.channels;
+    pDevice->internalSampleRate = (mal_uint32)obtainedSpec.freq;
+    pDevice->bufferSizeInFrames = obtainedSpec.samples;
+    pDevice->periods            = 1;    // SDL doesn't seem to tell us what the period count is. Just set this 1.
+
+#if 0
+    printf("=== SDL CONFIG ===\n");
+    printf("REQUESTED -> RECEIVED\n");
+    printf("    FORMAT:                 %s -> %s\n", mal_get_format_name(pConfig->format), mal_get_format_name(pDevice->internalFormat));
+    printf("    CHANNELS:               %d -> %d\n", desiredSpec.channels, obtainedSpec.channels);
+    printf("    SAMPLE RATE:            %d -> %d\n", desiredSpec.freq, obtainedSpec.freq);
+    printf("    BUFFER SIZE IN SAMPLES: %d -> %d\n", desiredSpec.samples, obtainedSpec.samples);
+#endif
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__start_backend__sdl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+#ifndef MAL_USE_SDL_1
+    if (!pDevice->pContext->sdl.usingSDL1) {
+        ((MAL_PFN_SDL_PauseAudioDevice)pDevice->pContext->sdl.SDL_PauseAudioDevice)(pDevice->sdl.deviceID, 0);
+    } else
+#endif
+    {
+        ((MAL_PFN_SDL_PauseAudio)pDevice->pContext->sdl.SDL_PauseAudio)(0);
+    }
+
+    return MAL_SUCCESS;
+}
+
+static mal_result mal_device__stop_backend__sdl(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+#ifndef MAL_USE_SDL_1
+    if (!pDevice->pContext->sdl.usingSDL1) {
+        ((MAL_PFN_SDL_PauseAudioDevice)pDevice->pContext->sdl.SDL_PauseAudioDevice)(pDevice->sdl.deviceID, 1);
+    } else
+#endif
+    {
+        ((MAL_PFN_SDL_PauseAudio)pDevice->pContext->sdl.SDL_PauseAudio)(1);
+    }
+
+    return MAL_SUCCESS;
+}
+#endif  // SDL
+
+
+
+
+mal_bool32 mal__is_channel_map_valid(const mal_channel* channelMap, mal_uint32 channels)
+{
+    mal_assert(channels > 0);
+
+    // A channel cannot be present in the channel map more than once.
+    for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
+        for (mal_uint32 jChannel = iChannel + 1; jChannel < channels; ++jChannel) {
+            if (channelMap[iChannel] == channelMap[jChannel]) {
+                return MAL_FALSE;
+            }
+        }
+    }
+
+    return MAL_TRUE;
+}
+
+
+static mal_result mal_device__start_backend(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_HAS_WASAPI
+    if (pDevice->pContext->backend == mal_backend_wasapi) {
+        result = mal_device__start_backend__wasapi(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_DSOUND
+    if (pDevice->pContext->backend == mal_backend_dsound) {
+        result = mal_device__start_backend__dsound(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_WINMM
+    if (pDevice->pContext->backend == mal_backend_winmm) {
+        result = mal_device__start_backend__winmm(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_ALSA
+    if (pDevice->pContext->backend == mal_backend_alsa) {
+        result = mal_device__start_backend__alsa(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OSS
+    if (pDevice->pContext->backend == mal_backend_oss) {
+        result = mal_device__start_backend__oss(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENAL
+    if (pDevice->pContext->backend == mal_backend_openal) {
+        result = mal_device__start_backend__openal(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_NULL
+    if (pDevice->pContext->backend == mal_backend_null) {
+        result = mal_device__start_backend__null(pDevice);
+    }
+#endif
+
+    return result;
+}
+
+static mal_result mal_device__stop_backend(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_HAS_WASAPI
+    if (pDevice->pContext->backend == mal_backend_wasapi) {
+        result = mal_device__stop_backend__wasapi(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_DSOUND
+    if (pDevice->pContext->backend == mal_backend_dsound) {
+        result = mal_device__stop_backend__dsound(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_WINMM
+    if (pDevice->pContext->backend == mal_backend_winmm) {
+        result = mal_device__stop_backend__winmm(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_ALSA
+    if (pDevice->pContext->backend == mal_backend_alsa) {
+        result = mal_device__stop_backend__alsa(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OSS
+    if (pDevice->pContext->backend == mal_backend_oss) {
+        result = mal_device__stop_backend__oss(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENAL
+    if (pDevice->pContext->backend == mal_backend_openal) {
+        result = mal_device__stop_backend__openal(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_NULL
+    if (pDevice->pContext->backend == mal_backend_null) {
+        result = mal_device__stop_backend__null(pDevice);
+    }
+#endif
+
+    return result;
+}
+
+static mal_result mal_device__break_main_loop(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_HAS_WASAPI
+    if (pDevice->pContext->backend == mal_backend_wasapi) {
+        result = mal_device__break_main_loop__wasapi(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_DSOUND
+    if (pDevice->pContext->backend == mal_backend_dsound) {
+        result = mal_device__break_main_loop__dsound(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_WINMM
+    if (pDevice->pContext->backend == mal_backend_winmm) {
+        result = mal_device__break_main_loop__winmm(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_ALSA
+    if (pDevice->pContext->backend == mal_backend_alsa) {
+        result = mal_device__break_main_loop__alsa(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OSS
+    if (pDevice->pContext->backend == mal_backend_oss) {
+        result = mal_device__break_main_loop__oss(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENAL
+    if (pDevice->pContext->backend == mal_backend_openal) {
+        result = mal_device__break_main_loop__openal(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_NULL
+    if (pDevice->pContext->backend == mal_backend_null) {
+        result = mal_device__break_main_loop__null(pDevice);
+    }
+#endif
+
+    return result;
+}
+
+static mal_result mal_device__main_loop(mal_device* pDevice)
+{
+    mal_assert(pDevice != NULL);
+
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_HAS_WASAPI
+    if (pDevice->pContext->backend == mal_backend_wasapi) {
+        result = mal_device__main_loop__wasapi(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_DSOUND
+    if (pDevice->pContext->backend == mal_backend_dsound) {
+        result = mal_device__main_loop__dsound(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_WINMM
+    if (pDevice->pContext->backend == mal_backend_winmm) {
+        result = mal_device__main_loop__winmm(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_ALSA
+    if (pDevice->pContext->backend == mal_backend_alsa) {
+        result = mal_device__main_loop__alsa(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OSS
+    if (pDevice->pContext->backend == mal_backend_oss) {
+        result = mal_device__main_loop__oss(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENAL
+    if (pDevice->pContext->backend == mal_backend_openal) {
+        result = mal_device__main_loop__openal(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_NULL
+    if (pDevice->pContext->backend == mal_backend_null) {
+        result = mal_device__main_loop__null(pDevice);
+    }
+#endif
+
+    return result;
+}
+
+mal_thread_result MAL_THREADCALL mal_worker_thread(void* pData)
+{
+    mal_device* pDevice = (mal_device*)pData;
+    mal_assert(pDevice != NULL);
+
+#ifdef MAL_WIN32
+    mal_CoInitializeEx(pDevice->pContext, NULL, 0); // 0 = COINIT_MULTITHREADED
+#endif
+
+    // This is only used to prevent posting onStop() when the device is first initialized.
+    mal_bool32 skipNextStopEvent = MAL_TRUE;
+
+    for (;;) {
+        // At the start of iteration the device is stopped - we must explicitly mark it as such.
+        mal_device__stop_backend(pDevice);
+
+        if (!skipNextStopEvent) {
+            mal_stop_proc onStop = pDevice->onStop;
+            if (onStop) {
+                onStop(pDevice);
+            }
+        } else {
+            skipNextStopEvent = MAL_FALSE;
+        }
+
+
+        // Let the other threads know that the device has stopped.
+        mal_device__set_state(pDevice, MAL_STATE_STOPPED);
+        mal_event_signal(&pDevice->stopEvent);
+
+        // We use an event to wait for a request to wake up.
+        mal_event_wait(&pDevice->wakeupEvent);
+
+        // Default result code.
+        pDevice->workResult = MAL_SUCCESS;
+
+        // Just break if we're terminating.
+        if (mal_device__get_state(pDevice) == MAL_STATE_UNINITIALIZED) {
+            break;
+        }
+
+
+        // Getting here means we just started the device and we need to wait for the device to
+        // either deliver us data (recording) or request more data (playback).
+        mal_assert(mal_device__get_state(pDevice) == MAL_STATE_STARTING);
+
+        pDevice->workResult = mal_device__start_backend(pDevice);
+        if (pDevice->workResult != MAL_SUCCESS) {
+            mal_event_signal(&pDevice->startEvent);
+            continue;
+        }
+
+        // The thread that requested the device to start playing is waiting for this thread to start the
+        // device for real, which is now.
+        mal_device__set_state(pDevice, MAL_STATE_STARTED);
+        mal_event_signal(&pDevice->startEvent);
+
+        // Now we just enter the main loop. The main loop can be broken with mal_device__break_main_loop().
+        mal_device__main_loop(pDevice);
+    }
+
+    // Make sure we aren't continuously waiting on a stop event.
+    mal_event_signal(&pDevice->stopEvent);  // <-- Is this still needed?
+
+#ifdef MAL_WIN32
+    mal_CoUninitialize(pDevice->pContext);
+#endif
+
+    return (mal_thread_result)0;
+}
+
+
+// Helper for determining whether or not the given device is initialized.
+mal_bool32 mal_device__is_initialized(mal_device* pDevice)
+{
+    if (pDevice == NULL) return MAL_FALSE;
+    return mal_device__get_state(pDevice) != MAL_STATE_UNINITIALIZED;
+}
+
+
+#ifdef MAL_WIN32
+mal_result mal_context_uninit_backend_apis__win32(mal_context* pContext)
+{
+    mal_CoUninitialize(pContext);
+    mal_dlclose(pContext->win32.hUser32DLL);
+    mal_dlclose(pContext->win32.hOle32DLL);
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_init_backend_apis__win32(mal_context* pContext)
+{
+#ifdef MAL_WIN32_DESKTOP
+    // Ole32.dll
+    pContext->win32.hOle32DLL = mal_dlopen("ole32.dll");
+    if (pContext->win32.hOle32DLL == NULL) {
+        return MAL_FAILED_TO_INIT_BACKEND;
+    }
+
+    pContext->win32.CoInitializeEx   = (mal_proc)mal_dlsym(pContext->win32.hOle32DLL, "CoInitializeEx");
+    pContext->win32.CoUninitialize   = (mal_proc)mal_dlsym(pContext->win32.hOle32DLL, "CoUninitialize");
+    pContext->win32.CoCreateInstance = (mal_proc)mal_dlsym(pContext->win32.hOle32DLL, "CoCreateInstance");
+    pContext->win32.CoTaskMemFree    = (mal_proc)mal_dlsym(pContext->win32.hOle32DLL, "CoTaskMemFree");
+    pContext->win32.PropVariantClear = (mal_proc)mal_dlsym(pContext->win32.hOle32DLL, "PropVariantClear");
+
+
+    // User32.dll
+    pContext->win32.hUser32DLL = mal_dlopen("user32.dll");
+    if (pContext->win32.hUser32DLL == NULL) {
+        return MAL_FAILED_TO_INIT_BACKEND;
+    }
+
+    pContext->win32.GetForegroundWindow = (mal_proc)mal_dlsym(pContext->win32.hUser32DLL, "GetForegroundWindow");
+    pContext->win32.GetDesktopWindow    = (mal_proc)mal_dlsym(pContext->win32.hUser32DLL, "GetDesktopWindow");
+#endif
+
+    mal_CoInitializeEx(pContext, NULL, 0);  // 0 = COINIT_MULTITHREADED
+    return MAL_SUCCESS;
+}
+#else
+mal_result mal_context_uninit_backend_apis__nix(mal_context* pContext)
+{
+    mal_dlclose(pContext->posix.pthreadSO);
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_context_init_backend_apis__nix(mal_context* pContext)
+{
+    // pthread
+#if !defined(MAL_NO_RUNTIME_LINKING)
+    const char* libpthreadFileNames[] = {
+        "libpthread.so",
+        "libpthread.so.0",
+        "libpthread.dylib"
+    };
+
+    for (size_t i = 0; i < sizeof(libpthreadFileNames) / sizeof(libpthreadFileNames[0]); ++i) {
+        pContext->posix.pthreadSO = mal_dlopen(libpthreadFileNames[i]);
+        if (pContext->posix.pthreadSO != NULL) {
+            break;
+        }
+    }
+
+    if (pContext->posix.pthreadSO == NULL) {
+        return MAL_FAILED_TO_INIT_BACKEND;
+    }
+
+    pContext->posix.pthread_create        = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_create");
+    pContext->posix.pthread_join          = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_join");
+    pContext->posix.pthread_mutex_init    = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_mutex_init");
+    pContext->posix.pthread_mutex_destroy = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_mutex_destroy");
+    pContext->posix.pthread_mutex_lock    = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_mutex_lock");
+    pContext->posix.pthread_mutex_unlock  = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_mutex_unlock");
+    pContext->posix.pthread_cond_init     = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_cond_init");
+    pContext->posix.pthread_cond_destroy  = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_cond_destroy");
+    pContext->posix.pthread_cond_wait     = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_cond_wait");
+    pContext->posix.pthread_cond_signal   = (mal_proc)mal_dlsym(pContext->posix.pthreadSO, "pthread_cond_signal");
+#else
+    pContext->posix.pthread_create        = (mal_proc)pthread_create;
+    pContext->posix.pthread_join          = (mal_proc)pthread_join;
+    pContext->posix.pthread_mutex_init    = (mal_proc)pthread_mutex_init;
+    pContext->posix.pthread_mutex_destroy = (mal_proc)pthread_mutex_destroy;
+    pContext->posix.pthread_mutex_lock    = (mal_proc)pthread_mutex_lock;
+    pContext->posix.pthread_mutex_unlock  = (mal_proc)pthread_mutex_unlock;
+    pContext->posix.pthread_cond_init     = (mal_proc)pthread_cond_init;
+    pContext->posix.pthread_cond_destroy  = (mal_proc)pthread_cond_destroy;
+    pContext->posix.pthread_cond_wait     = (mal_proc)pthread_cond_wait;
+    pContext->posix.pthread_cond_signal   = (mal_proc)pthread_cond_signal;
+#endif
+
+    return MAL_SUCCESS;
+}
+#endif
+
+mal_result mal_context_init_backend_apis(mal_context* pContext)
+{
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_WIN32
+    result = mal_context_init_backend_apis__win32(pContext);
+#else
+    result = mal_context_init_backend_apis__nix(pContext);
+#endif
+
+    return result;
+}
+
+mal_result mal_context_uninit_backend_apis(mal_context* pContext)
+{
+    mal_result result = MAL_NO_BACKEND;
+#ifdef MAL_WIN32
+    result = mal_context_uninit_backend_apis__win32(pContext);
+#else
+    result = mal_context_uninit_backend_apis__nix(pContext);
+#endif
+
+    return result;
+}
+
+mal_result mal_context_init(mal_backend backends[], mal_uint32 backendCount, const mal_context_config* pConfig, mal_context* pContext)
+{
+    if (pContext == NULL) return MAL_INVALID_ARGS;
+    mal_zero_object(pContext);
+
+    // Always make sure the config is set first to ensure properties are available as soon as possible.
+    if (pConfig != NULL) {
+        pContext->config = *pConfig;
+    } else {
+        pContext->config = mal_context_config_init(NULL);
+    }
+
+    // Backend APIs need to be initialized first. This is where external libraries will be loaded and linked.
+    mal_result result = mal_context_init_backend_apis(pContext);
+    if (result != MAL_SUCCESS) {
+        return result;
+    }
+
+    static mal_backend defaultBackends[] = {
+        mal_backend_wasapi,
+        mal_backend_dsound,
+        mal_backend_winmm,
+        mal_backend_alsa,
+        mal_backend_oss,
+        mal_backend_opensl,
+        mal_backend_openal,
+        mal_backend_sdl,
+        mal_backend_null
+    };
+
+    if (backends == NULL) {
+        backends = defaultBackends;
+        backendCount = sizeof(defaultBackends) / sizeof(defaultBackends[0]);
+    }
+
+    mal_assert(backends != NULL);
+
+    for (mal_uint32 iBackend = 0; iBackend < backendCount; ++iBackend) {
+        mal_backend backend = backends[iBackend];
+
+        result = MAL_NO_BACKEND;
+        switch (backend) {
+        #ifdef MAL_HAS_WASAPI
+            case mal_backend_wasapi:
+            {
+                result = mal_context_init__wasapi(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_DSOUND
+            case mal_backend_dsound:
+            {
+                result = mal_context_init__dsound(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_WINMM
+            case mal_backend_winmm:
+            {
+                result = mal_context_init__winmm(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_ALSA
+            case mal_backend_alsa:
+            {
+                result = mal_context_init__alsa(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_OSS
+            case mal_backend_oss:
+            {
+                result = mal_context_init__oss(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_OPENSL
+            case mal_backend_opensl:
+            {
+                result = mal_context_init__opensl(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_OPENAL
+            case mal_backend_openal:
+            {
+                result = mal_context_init__openal(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_SDL
+            case mal_backend_sdl:
+            {
+                result = mal_context_init__sdl(pContext);
+            } break;
+        #endif
+        #ifdef MAL_HAS_NULL
+            case mal_backend_null:
+            {
+                result = mal_context_init__null(pContext);
+            } break;
+        #endif
+
+            default: break;
+        }
+
+        // If this iteration was successful, return.
+        if (result == MAL_SUCCESS) {
+            pContext->backend = backend;
+            return result;
+        }
+    }
+
+    mal_zero_object(pContext);  // Safety.
+    return MAL_NO_BACKEND;
+}
+
+mal_result mal_context_uninit(mal_context* pContext)
+{
+    if (pContext == NULL) return MAL_INVALID_ARGS;
+
+    switch (pContext->backend) {
+    #ifdef MAL_HAS_WASAPI
+        case mal_backend_wasapi:
+        {
+            return mal_context_uninit__wasapi(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_DSOUND
+        case mal_backend_dsound:
+        {
+            return mal_context_uninit__dsound(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_WINMM
+        case mal_backend_winmm:
+        {
+            return mal_context_uninit__winmm(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_ALSA
+        case mal_backend_alsa:
+        {
+            return mal_context_uninit__alsa(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OSS
+        case mal_backend_oss:
+        {
+            return mal_context_uninit__oss(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENSL
+        case mal_backend_opensl:
+        {
+            return mal_context_uninit__opensl(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENAL
+        case mal_backend_openal:
+        {
+            return mal_context_uninit__openal(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_SDL
+        case mal_backend_sdl:
+        {
+            return mal_context_uninit__sdl(pContext);
+        } break;
+    #endif
+    #ifdef MAL_HAS_NULL
+        case mal_backend_null:
+        {
+            return mal_context_uninit__null(pContext);
+        } break;
+    #endif
+
+        default: break;
+    }
+
+    mal_context_uninit_backend_apis(pContext);
+
+    mal_assert(MAL_FALSE);
+    return MAL_NO_BACKEND;
+}
+
+
+mal_result mal_enumerate_devices(mal_context* pContext, mal_device_type type, mal_uint32* pCount, mal_device_info* pInfo)
+{
+    if (pCount == NULL) return mal_post_error(NULL, "mal_enumerate_devices() called with invalid arguments (pCount == 0).", MAL_INVALID_ARGS);
+
+    // The output buffer needs to be initialized to zero.
+    if (pInfo != NULL) {
+        mal_zero_memory(pInfo, (*pCount) * sizeof(*pInfo));
+    }
+
+    switch (pContext->backend)
+    {
+    #ifdef MAL_HAS_WASAPI
+        case mal_backend_wasapi:
+        {
+            return mal_enumerate_devices__wasapi(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_DSOUND
+        case mal_backend_dsound:
+        {
+            return mal_enumerate_devices__dsound(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_WINMM
+        case mal_backend_winmm:
+        {
+            return mal_enumerate_devices__winmm(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_ALSA
+        case mal_backend_alsa:
+        {
+            return mal_enumerate_devices__alsa(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OSS
+        case mal_backend_oss:
+        {
+            return mal_enumerate_devices__oss(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENSL
+        case mal_backend_opensl:
+        {
+            return mal_enumerate_devices__opensl(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENAL
+        case mal_backend_openal:
+        {
+            return mal_enumerate_devices__openal(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_SDL
+        case mal_backend_sdl:
+        {
+            return mal_enumerate_devices__sdl(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+    #ifdef MAL_HAS_NULL
+        case mal_backend_null:
+        {
+            return mal_enumerate_devices__null(pContext, type, pCount, pInfo);
+        } break;
+    #endif
+
+        default: break;
+    }
+
+    mal_assert(MAL_FALSE);
+    return MAL_NO_BACKEND;
+}
+
+mal_result mal_device_init(mal_context* pContext, mal_device_type type, mal_device_id* pDeviceID, const mal_device_config* pConfig, void* pUserData, mal_device* pDevice)
+{
+    if (pDevice == NULL) {
+        return mal_post_error(pDevice, "mal_device_init() called with invalid arguments (pDevice == NULL).",  MAL_INVALID_ARGS);
+    }
+    if (pConfig == NULL) {
+        return mal_post_error(pDevice, "mal_device_init() called with invalid arguments (pConfig == NULL).",  MAL_INVALID_ARGS);
+    }
+
+    // Make a copy of the config to ensure we don't override the caller's object.
+    mal_device_config config = *pConfig;
+
+    mal_zero_object(pDevice);
+    pDevice->pContext = pContext;
+
+    // Set the user data and log callback ASAP to ensure it is available for the entire initialization process.
+    pDevice->pUserData = pUserData;
+    pDevice->onStop = config.onStopCallback;
+    pDevice->onSend = config.onSendCallback;
+    pDevice->onRecv = config.onRecvCallback;
+
+    if (((size_t)pDevice % sizeof(pDevice)) != 0) {
+        if (pContext->config.onLog) {
+            pContext->config.onLog(pContext, pDevice, "WARNING: mal_device_init() called for a device that is not properly aligned. Thread safety is not supported.");
+        }
+    }
+
+
+    if (pContext == NULL) {
+        return mal_post_error(pDevice, "mal_device_init() called with invalid arguments (pContext == NULL).", MAL_INVALID_ARGS);
+    }
+
+
+    // Basic config validation.
+    if (config.channels == 0) {
+        return mal_post_error(pDevice, "mal_device_init() called with an invalid config. Channel count must be greater than 0.", MAL_INVALID_DEVICE_CONFIG);
+    }
+    if (config.channels > MAL_MAX_CHANNELS) {
+        return mal_post_error(pDevice, "mal_device_init() called with an invalid config. Channel count cannot exceed 18.", MAL_INVALID_DEVICE_CONFIG);
+    }
+
+    if (config.sampleRate == 0) {
+        return mal_post_error(pDevice, "mal_device_init() called with an invalid config. Sample rate must be greater than 0.", MAL_INVALID_DEVICE_CONFIG);
+    }
+
+    if (!mal__is_channel_map_valid(pConfig->channelMap, pConfig->channels)) {
+        return mal_post_error(pDevice, "mal_device_init() called with invalid arguments. Channel map is invalid.", MAL_INVALID_DEVICE_CONFIG);
+    }
+
+
+    // Default buffer size and periods.
+    if (config.bufferSizeInFrames == 0) {
+        config.bufferSizeInFrames = (config.sampleRate/1000) * MAL_DEFAULT_BUFFER_SIZE_IN_MILLISECONDS;
+        pDevice->usingDefaultBufferSize = MAL_TRUE;
+    }
+    if (config.periods == 0) {
+        config.periods = MAL_DEFAULT_PERIODS;
+        pDevice->usingDefaultPeriods = MAL_TRUE;
+    }
+
+    pDevice->type = type;
+    pDevice->format = config.format;
+    pDevice->channels = config.channels;
+    mal_copy_memory(config.channelMap, config.channelMap, sizeof(config.channelMap[0]) * config.channels);
+    pDevice->sampleRate = config.sampleRate;
+    pDevice->bufferSizeInFrames = config.bufferSizeInFrames;
+    pDevice->periods = config.periods;
+
+    // The internal format, channel count and sample rate can be modified by the backend.
+    pDevice->internalFormat = pDevice->format;
+    pDevice->internalChannels = pDevice->channels;
+    pDevice->internalSampleRate = pDevice->sampleRate;
+    mal_copy_memory(pDevice->internalChannelMap, pDevice->channelMap, sizeof(pDevice->channelMap));
+
+    if (mal_mutex_init(pContext, &pDevice->lock) != MAL_SUCCESS) {
+        return mal_post_error(pDevice, "Failed to create mutex.", MAL_FAILED_TO_CREATE_MUTEX);
+    }
+
+    // When the device is started, the worker thread is the one that does the actual startup of the backend device. We
+    // use a semaphore to wait for the background thread to finish the work. The same applies for stopping the device.
+    //
+    // Each of these semaphores is released internally by the worker thread when the work is completed. The start
+    // semaphore is also used to wake up the worker thread.
+    if (mal_event_init(pContext, &pDevice->wakeupEvent) != MAL_SUCCESS) {
+        mal_mutex_uninit(&pDevice->lock);
+        return mal_post_error(pDevice, "Failed to create worker thread wakeup event.", MAL_FAILED_TO_CREATE_EVENT);
+    }
+    if (mal_event_init(pContext, &pDevice->startEvent) != MAL_SUCCESS) {
+        mal_event_uninit(&pDevice->wakeupEvent);
+        mal_mutex_uninit(&pDevice->lock);
+        return mal_post_error(pDevice, "Failed to create worker thread start event.", MAL_FAILED_TO_CREATE_EVENT);
+    }
+    if (mal_event_init(pContext, &pDevice->stopEvent) != MAL_SUCCESS) {
+        mal_event_uninit(&pDevice->startEvent);
+        mal_event_uninit(&pDevice->wakeupEvent);
+        mal_mutex_uninit(&pDevice->lock);
+        return mal_post_error(pDevice, "Failed to create worker thread stop event.", MAL_FAILED_TO_CREATE_EVENT);
+    }
+
+
+    mal_result result = MAL_NO_BACKEND;
+    switch (pContext->backend)
+    {
+    #ifdef MAL_HAS_WASAPI
+        case mal_backend_wasapi:
+        {
+            result = mal_device_init__wasapi(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_DSOUND
+        case mal_backend_dsound:
+        {
+            result = mal_device_init__dsound(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_WINMM
+        case mal_backend_winmm:
+        {
+            result = mal_device_init__winmm(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_ALSA
+        case mal_backend_alsa:
+        {
+            result = mal_device_init__alsa(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OSS
+        case mal_backend_oss:
+        {
+            result = mal_device_init__oss(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENSL
+        case mal_backend_opensl:
+        {
+            result = mal_device_init__opensl(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_OPENAL
+        case mal_backend_openal:
+        {
+            result = mal_device_init__openal(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_SDL
+        case mal_backend_sdl:
+        {
+            result = mal_device_init__sdl(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+    #ifdef MAL_HAS_NULL
+        case mal_backend_null:
+        {
+            result = mal_device_init__null(pContext, type, pDeviceID, &config, pDevice);
+        } break;
+    #endif
+
+        default: break;
+    }
+
+    if (result != MAL_SUCCESS) {
+        return MAL_NO_BACKEND;  // The error message will have been posted with mal_post_error() by the source of the error so don't bother calling it here.
+    }
+
+
+    // If the backend did not fill out a name for the device, try a generic method.
+    if (pDevice->name[0] == '\0') {
+        if (mal_context__try_get_device_name_by_id(pContext, type, pDeviceID, pDevice->name, sizeof(pDevice->name)) != MAL_SUCCESS) {
+            // We failed to get the device name, so fall back to some generic names.
+            if (pDeviceID == NULL) {
+                if (type == mal_device_type_playback) {
+                    mal_strncpy_s(pDevice->name, sizeof(pDevice->name), "Default Playback Device", (size_t)-1);
+                } else {
+                    mal_strncpy_s(pDevice->name, sizeof(pDevice->name), "Default Capture Device", (size_t)-1);
+                }
+            } else {
+                if (type == mal_device_type_playback) {
+                    mal_strncpy_s(pDevice->name, sizeof(pDevice->name), "Playback Device", (size_t)-1);
+                } else {
+                    mal_strncpy_s(pDevice->name, sizeof(pDevice->name), "Capture Device", (size_t)-1);
+                }
+            }
+        }
+    }
+
+
+    // We need a DSP object which is where samples are moved through in order to convert them to the
+    // format required by the backend.
+    mal_dsp_config dspConfig;
+    dspConfig.cacheSizeInFrames = pDevice->bufferSizeInFrames;
+    if (type == mal_device_type_playback) {
+        dspConfig.formatIn      = pDevice->format;
+        dspConfig.channelsIn    = pDevice->channels;
+        dspConfig.sampleRateIn  = pDevice->sampleRate;
+        mal_copy_memory(dspConfig.channelMapIn, pDevice->channelMap, sizeof(dspConfig.channelMapIn));
+        dspConfig.formatOut     = pDevice->internalFormat;
+        dspConfig.channelsOut   = pDevice->internalChannels;
+        dspConfig.sampleRateOut = pDevice->internalSampleRate;
+        mal_copy_memory(dspConfig.channelMapOut, pDevice->internalChannelMap, sizeof(dspConfig.channelMapOut));
+        mal_dsp_init(&dspConfig, mal_device__on_read_from_client, pDevice, &pDevice->dsp);
+    } else {
+        dspConfig.formatIn      = pDevice->internalFormat;
+        dspConfig.channelsIn    = pDevice->internalChannels;
+        dspConfig.sampleRateIn  = pDevice->internalSampleRate;
+        mal_copy_memory(dspConfig.channelMapIn, pDevice->internalChannelMap, sizeof(dspConfig.channelMapIn));
+        dspConfig.formatOut     = pDevice->format;
+        dspConfig.channelsOut   = pDevice->channels;
+        dspConfig.sampleRateOut = pDevice->sampleRate;
+        mal_copy_memory(dspConfig.channelMapOut, pDevice->channelMap, sizeof(dspConfig.channelMapOut));
+        mal_dsp_init(&dspConfig, mal_device__on_read_from_device, pDevice, &pDevice->dsp);
+    }
+
+
+
+
+    // Some backends don't require the worker thread.
+    if (pContext->backend != mal_backend_opensl && pContext->backend != mal_backend_sdl) {
+        // The worker thread.
+        if (mal_thread_create(pContext, &pDevice->thread, mal_worker_thread, pDevice) != MAL_SUCCESS) {
+            mal_device_uninit(pDevice);
+            return mal_post_error(pDevice, "Failed to create worker thread.", MAL_FAILED_TO_CREATE_THREAD);
+        }
+
+        // Wait for the worker thread to put the device into it's stopped state for real.
+        mal_event_wait(&pDevice->stopEvent);
+    } else {
+        mal_device__set_state(pDevice, MAL_STATE_STOPPED);
+    }
+
+    mal_assert(mal_device__get_state(pDevice) == MAL_STATE_STOPPED);
+    return MAL_SUCCESS;
+}
+
+void mal_device_uninit(mal_device* pDevice)
+{
+    if (!mal_device__is_initialized(pDevice)) return;
+
+    // Make sure the device is stopped first. The backends will probably handle this naturally,
+    // but I like to do it explicitly for my own sanity.
+    if (mal_device_is_started(pDevice)) {
+        while (mal_device_stop(pDevice) == MAL_DEVICE_BUSY) {
+            mal_sleep(1);
+        }
+    }
+
+    // Putting the device into an uninitialized state will make the worker thread return.
+    mal_device__set_state(pDevice, MAL_STATE_UNINITIALIZED);
+
+    // Wake up the worker thread and wait for it to properly terminate.
+    if (pDevice->pContext->backend != mal_backend_opensl && pDevice->pContext->backend != mal_backend_sdl) {
+        mal_event_signal(&pDevice->wakeupEvent);
+        mal_thread_wait(&pDevice->thread);
+    }
+
+    mal_event_uninit(&pDevice->stopEvent);
+    mal_event_uninit(&pDevice->startEvent);
+    mal_event_uninit(&pDevice->wakeupEvent);
+    mal_mutex_uninit(&pDevice->lock);
+
+#ifdef MAL_HAS_WASAPI
+    if (pDevice->pContext->backend == mal_backend_wasapi) {
+        mal_device_uninit__wasapi(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_DSOUND
+    if (pDevice->pContext->backend == mal_backend_dsound) {
+        mal_device_uninit__dsound(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_WINMM
+    if (pDevice->pContext->backend == mal_backend_winmm) {
+        mal_device_uninit__winmm(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_ALSA
+    if (pDevice->pContext->backend == mal_backend_alsa) {
+        mal_device_uninit__alsa(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OSS
+    if (pDevice->pContext->backend == mal_backend_oss) {
+        mal_device_uninit__oss(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENSL
+    if (pDevice->pContext->backend == mal_backend_opensl) {
+        mal_device_uninit__opensl(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_OPENAL
+    if (pDevice->pContext->backend == mal_backend_openal) {
+        mal_device_uninit__openal(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_SDL
+    if (pDevice->pContext->backend == mal_backend_sdl) {
+        mal_device_uninit__sdl(pDevice);
+    }
+#endif
+#ifdef MAL_HAS_NULL
+    if (pDevice->pContext->backend == mal_backend_null) {
+        mal_device_uninit__null(pDevice);
+    }
+#endif
+
+    mal_zero_object(pDevice);
+}
+
+void mal_device_set_recv_callback(mal_device* pDevice, mal_recv_proc proc)
+{
+    if (pDevice == NULL) return;
+    mal_atomic_exchange_ptr(&pDevice->onRecv, proc);
+}
+
+void mal_device_set_send_callback(mal_device* pDevice, mal_send_proc proc)
+{
+    if (pDevice == NULL) return;
+    mal_atomic_exchange_ptr(&pDevice->onSend, proc);
+}
+
+void mal_device_set_stop_callback(mal_device* pDevice, mal_stop_proc proc)
+{
+    if (pDevice == NULL) return;
+    mal_atomic_exchange_ptr(&pDevice->onStop, proc);
+}
+
+mal_result mal_device_start(mal_device* pDevice)
+{
+    if (pDevice == NULL) return mal_post_error(pDevice, "mal_device_start() called with invalid arguments (pDevice == NULL).", MAL_INVALID_ARGS);
+    if (mal_device__get_state(pDevice) == MAL_STATE_UNINITIALIZED) return mal_post_error(pDevice, "mal_device_start() called for an uninitialized device.", MAL_DEVICE_NOT_INITIALIZED);
+
+    mal_result result = MAL_ERROR;
+    mal_mutex_lock(&pDevice->lock);
+    {
+        // Be a bit more descriptive if the device is already started or is already in the process of starting. This is likely
+        // a bug with the application.
+        if (mal_device__get_state(pDevice) == MAL_STATE_STARTING) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_start() called while another thread is already starting it.", MAL_DEVICE_ALREADY_STARTING);
+        }
+        if (mal_device__get_state(pDevice) == MAL_STATE_STARTED) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_start() called for a device that's already started.", MAL_DEVICE_ALREADY_STARTED);
+        }
+
+        // The device needs to be in a stopped state. If it's not, we just let the caller know the device is busy.
+        if (mal_device__get_state(pDevice) != MAL_STATE_STOPPED) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_start() called while another thread is in the process of stopping it.", MAL_DEVICE_BUSY);
+        }
+
+        mal_device__set_state(pDevice, MAL_STATE_STARTING);
+
+        // Asynchronous backends need to be handled differently.
+#ifdef MAL_HAS_OPENSL
+        if (pDevice->pContext->backend == mal_backend_opensl) {
+            result = mal_device__start_backend__opensl(pDevice);
+            if (result == MAL_SUCCESS) {
+                mal_device__set_state(pDevice, MAL_STATE_STARTED);
+            }
+        } else
+#endif
+#ifdef MAL_HAS_SDL
+        if (pDevice->pContext->backend == mal_backend_sdl) {
+            result = mal_device__start_backend__sdl(pDevice);
+            if (result == MAL_SUCCESS) {
+                mal_device__set_state(pDevice, MAL_STATE_STARTED);
+            }
+        } else
+#endif
+        // Synchronous backends.
+        {
+            mal_event_signal(&pDevice->wakeupEvent);
+
+            // Wait for the worker thread to finish starting the device. Note that the worker thread will be the one
+            // who puts the device into the started state. Don't call mal_device__set_state() here.
+            mal_event_wait(&pDevice->startEvent);
+            result = pDevice->workResult;
+        }
+    }
+    mal_mutex_unlock(&pDevice->lock);
+
+    return result;
+}
+
+mal_result mal_device_stop(mal_device* pDevice)
+{
+    if (pDevice == NULL) return mal_post_error(pDevice, "mal_device_stop() called with invalid arguments (pDevice == NULL).", MAL_INVALID_ARGS);
+    if (mal_device__get_state(pDevice) == MAL_STATE_UNINITIALIZED) return mal_post_error(pDevice, "mal_device_stop() called for an uninitialized device.", MAL_DEVICE_NOT_INITIALIZED);
+
+    mal_result result = MAL_ERROR;
+    mal_mutex_lock(&pDevice->lock);
+    {
+        // Be a bit more descriptive if the device is already stopped or is already in the process of stopping. This is likely
+        // a bug with the application.
+        if (mal_device__get_state(pDevice) == MAL_STATE_STOPPING) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_stop() called while another thread is already stopping it.", MAL_DEVICE_ALREADY_STOPPING);
+        }
+        if (mal_device__get_state(pDevice) == MAL_STATE_STOPPED) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_stop() called for a device that's already stopped.", MAL_DEVICE_ALREADY_STOPPED);
+        }
+
+        // The device needs to be in a started state. If it's not, we just let the caller know the device is busy.
+        if (mal_device__get_state(pDevice) != MAL_STATE_STARTED) {
+            mal_mutex_unlock(&pDevice->lock);
+            return mal_post_error(pDevice, "mal_device_stop() called while another thread is in the process of starting it.", MAL_DEVICE_BUSY);
+        }
+
+        mal_device__set_state(pDevice, MAL_STATE_STOPPING);
+
+        // There's no need to wake up the thread like we do when starting.
+
+        // Asynchronous backends need to be handled differently.
+#ifdef MAL_HAS_OPENSL
+        if (pDevice->pContext->backend == mal_backend_opensl) {
+            mal_device__stop_backend__opensl(pDevice);
+        } else
+#endif
+#ifdef MAL_HAS_SDL
+        if (pDevice->pContext->backend == mal_backend_sdl) {
+            mal_device__stop_backend__sdl(pDevice);
+        } else
+#endif
+        // Synchronous backends.
+        {
+            // When we get here the worker thread is likely in a wait state while waiting for the backend device to deliver or request
+            // audio data. We need to force these to return as quickly as possible.
+            mal_device__break_main_loop(pDevice);
+
+            // We need to wait for the worker thread to become available for work before returning. Note that the worker thread will be
+            // the one who puts the device into the stopped state. Don't call mal_device__set_state() here.
+            mal_event_wait(&pDevice->stopEvent);
+            result = MAL_SUCCESS;
+        }
+    }
+    mal_mutex_unlock(&pDevice->lock);
+
+    return result;
+}
+
+mal_bool32 mal_device_is_started(mal_device* pDevice)
+{
+    if (pDevice == NULL) return MAL_FALSE;
+    return mal_device__get_state(pDevice) == MAL_STATE_STARTED;
+}
+
+mal_uint32 mal_device_get_buffer_size_in_bytes(mal_device* pDevice)
+{
+    if (pDevice == NULL) return 0;
+    return pDevice->bufferSizeInFrames * pDevice->channels * mal_get_sample_size_in_bytes(pDevice->format);
+}
+
+mal_uint32 mal_get_sample_size_in_bytes(mal_format format)
+{
+    mal_uint32 sizes[] = {
+        0,  // unknown
+        1,  // u8
+        2,  // s16
+        3,  // s24
+        4,  // s32
+        4,  // f32
+    };
+    return sizes[format];
+}
+
+mal_context_config mal_context_config_init(mal_log_proc onLog)
+{
+    mal_context_config config;
+    mal_zero_object(&config);
+
+    config.onLog = onLog;
+
+    return config;
+}
+
+mal_device_config mal_device_config_init(mal_format format, mal_uint32 channels, mal_uint32 sampleRate, mal_recv_proc onRecvCallback, mal_send_proc onSendCallback)
+{
+    mal_device_config config;
+    mal_zero_object(&config);
+
+    config.format = format;
+    config.channels = channels;
+    config.sampleRate = sampleRate;
+    config.onRecvCallback = onRecvCallback;
+    config.onSendCallback = onSendCallback;
+
+    switch (channels)
+    {
+        case 1:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_CENTER;
+        } break;
+
+        case 2:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+        } break;
+
+        case 3:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            config.channelMap[2] = MAL_CHANNEL_LFE;
+        } break;
+
+        case 4:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            config.channelMap[2] = MAL_CHANNEL_BACK_LEFT;
+            config.channelMap[3] = MAL_CHANNEL_BACK_RIGHT;
+        } break;
+
+        case 5:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            config.channelMap[2] = MAL_CHANNEL_BACK_LEFT;
+            config.channelMap[3] = MAL_CHANNEL_BACK_RIGHT;
+            config.channelMap[4] = MAL_CHANNEL_LFE;
+        } break;
+
+        case 6:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            config.channelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            config.channelMap[3] = MAL_CHANNEL_LFE;
+            config.channelMap[4] = MAL_CHANNEL_BACK_LEFT;
+            config.channelMap[5] = MAL_CHANNEL_BACK_RIGHT;
+        } break;
+
+        case 8:
+        {
+            config.channelMap[0] = MAL_CHANNEL_FRONT_LEFT;
+            config.channelMap[1] = MAL_CHANNEL_FRONT_RIGHT;
+            config.channelMap[2] = MAL_CHANNEL_FRONT_CENTER;
+            config.channelMap[3] = MAL_CHANNEL_LFE;
+            config.channelMap[4] = MAL_CHANNEL_BACK_LEFT;
+            config.channelMap[5] = MAL_CHANNEL_BACK_RIGHT;
+            config.channelMap[6] = MAL_CHANNEL_SIDE_LEFT;
+            config.channelMap[7] = MAL_CHANNEL_SIDE_RIGHT;
+        } break;
+
+        default:
+        {
+            // Just leave it all blank in this case. This will use the same mapping as the device's native mapping.
+        } break;
+    }
+
+    return config;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// SRC
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void mal_src_cache_init(mal_src* pSRC, mal_src_cache* pCache)
+{
+    mal_assert(pSRC != NULL);
+    mal_assert(pCache != NULL);
+
+    pCache->pSRC = pSRC;
+    pCache->cachedFrameCount = 0;
+    pCache->iNextFrame = 0;
+}
+
+mal_uint32 mal_src_cache_read_frames(mal_src_cache* pCache, mal_uint32 frameCount, float* pFramesOut)
+{
+    mal_assert(pCache != NULL);
+    mal_assert(pCache->pSRC != NULL);
+    mal_assert(pCache->pSRC->onRead != NULL);
+    mal_assert(frameCount > 0);
+    mal_assert(pFramesOut != NULL);
+
+    mal_uint32 channels = pCache->pSRC->config.channels;
+
+    mal_uint32 totalFramesRead = 0;
+    while (frameCount > 0) {
+        // If there's anything in memory go ahead and copy that over first.
+        mal_uint32 framesRemainingInMemory = pCache->cachedFrameCount - pCache->iNextFrame;
+        mal_uint32 framesToReadFromMemory = frameCount;
+        if (framesToReadFromMemory > framesRemainingInMemory) {
+            framesToReadFromMemory = framesRemainingInMemory;
+        }
+
+        mal_copy_memory(pFramesOut, pCache->pCachedFrames + pCache->iNextFrame*channels, framesToReadFromMemory * channels * sizeof(float));
+        pCache->iNextFrame += framesToReadFromMemory;
+
+        totalFramesRead += framesToReadFromMemory;
+        frameCount -= framesToReadFromMemory;
+        if (frameCount == 0) {
+            break;
+        }
+
+
+        // At this point there are still more frames to read from the client, so we'll need to reload the cache with fresh data.
+        mal_assert(frameCount > 0);
+        pFramesOut += framesToReadFromMemory * channels;
+
+        pCache->iNextFrame = 0;
+        pCache->cachedFrameCount = 0;
+        if (pCache->pSRC->config.formatIn == mal_format_f32) {
+            // No need for a conversion - read straight into the cache.
+            mal_uint32 framesToReadFromClient = mal_countof(pCache->pCachedFrames) / pCache->pSRC->config.channels;
+            if (framesToReadFromClient > pCache->pSRC->config.cacheSizeInFrames) {
+                framesToReadFromClient = pCache->pSRC->config.cacheSizeInFrames;
+            }
+
+            pCache->cachedFrameCount = pCache->pSRC->onRead(pCache->pSRC, framesToReadFromClient, pCache->pCachedFrames, pCache->pSRC->pUserData);
+        } else {
+            // A format conversion is required which means we need to use an intermediary buffer.
+            mal_uint8 pIntermediaryBuffer[sizeof(pCache->pCachedFrames)];
+            mal_uint32 framesToReadFromClient = mal_min(mal_buffer_frame_capacity(pIntermediaryBuffer, channels, pCache->pSRC->config.formatIn), mal_buffer_frame_capacity(pCache->pCachedFrames, channels, mal_format_f32));
+            if (framesToReadFromClient > pCache->pSRC->config.cacheSizeInFrames) {
+                framesToReadFromClient = pCache->pSRC->config.cacheSizeInFrames;
+            }
+
+            pCache->cachedFrameCount = pCache->pSRC->onRead(pCache->pSRC, framesToReadFromClient, pIntermediaryBuffer, pCache->pSRC->pUserData);
+
+            // Convert to f32.
+            mal_pcm_convert(pCache->pCachedFrames, mal_format_f32, pIntermediaryBuffer, pCache->pSRC->config.formatIn, pCache->cachedFrameCount * channels);
+        }
+
+
+        // Get out of this loop if nothing was able to be retrieved.
+        if (pCache->cachedFrameCount == 0) {
+            break;
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+mal_uint32 mal_src_read_frames_passthrough(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush);
+mal_uint32 mal_src_read_frames_linear(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush);
+
+mal_result mal_src_init(mal_src_config* pConfig, mal_src_read_proc onRead, void* pUserData, mal_src* pSRC)
+{
+    if (pSRC == NULL) return MAL_INVALID_ARGS;
+    mal_zero_object(pSRC);
+
+    if (pConfig == NULL || onRead == NULL) return MAL_INVALID_ARGS;
+    if (pConfig->channels == 0 || pConfig->channels > MAL_MAX_CHANNELS) return MAL_INVALID_ARGS;
+
+    pSRC->config = *pConfig;
+    pSRC->onRead = onRead;
+    pSRC->pUserData = pUserData;
+
+    if (pSRC->config.cacheSizeInFrames > MAL_SRC_CACHE_SIZE_IN_FRAMES || pSRC->config.cacheSizeInFrames == 0) {
+        pSRC->config.cacheSizeInFrames = MAL_SRC_CACHE_SIZE_IN_FRAMES;
+    }
+
+    mal_src_cache_init(pSRC, &pSRC->cache);
+    return MAL_SUCCESS;
+}
+
+mal_result mal_src_set_output_sample_rate(mal_src* pSRC, mal_uint32 sampleRateOut)
+{
+    if (pSRC == NULL) return MAL_INVALID_ARGS;
+
+    // Must have a sample rate of > 0.
+    if (sampleRateOut == 0) {
+        return MAL_INVALID_ARGS;
+    }
+
+    pSRC->config.sampleRateOut = sampleRateOut;
+    return MAL_SUCCESS;
+}
+
+mal_uint32 mal_src_read_frames(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut)
+{
+    return mal_src_read_frames_ex(pSRC, frameCount, pFramesOut, MAL_FALSE);
+}
+
+mal_uint32 mal_src_read_frames_ex(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush)
+{
+    if (pSRC == NULL || frameCount == 0 || pFramesOut == NULL) return 0;
+
+    mal_src_algorithm algorithm = pSRC->config.algorithm;
+
+    // Always use passthrough if the sample rates are the same.
+    if (pSRC->config.sampleRateIn == pSRC->config.sampleRateOut) {
+        algorithm = mal_src_algorithm_none;
+    }
+
+    // Could just use a function pointer instead of a switch for this...
+    switch (algorithm)
+    {
+        case mal_src_algorithm_none:   return mal_src_read_frames_passthrough(pSRC, frameCount, pFramesOut, flush);
+        case mal_src_algorithm_linear: return mal_src_read_frames_linear(pSRC, frameCount, pFramesOut, flush);
+        default: return 0;
+    }
+}
+
+mal_uint32 mal_src_read_frames_passthrough(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush)
+{
+    mal_assert(pSRC != NULL);
+    mal_assert(frameCount > 0);
+    mal_assert(pFramesOut != NULL);
+
+    (void)flush;    // Passthrough need not care about flushing.
+
+    // Fast path. No need for data conversion - just pass right through.
+    if (pSRC->config.formatIn == pSRC->config.formatOut) {
+        return pSRC->onRead(pSRC, frameCount, pFramesOut, pSRC->pUserData);
+    }
+
+    // Slower path. Need to do a format conversion.
+    mal_uint32 totalFramesRead = 0;
+    while (frameCount > 0) {
+        mal_uint8 pStagingBuffer[MAL_MAX_CHANNELS * 2048];
+        mal_uint32 stagingBufferSizeInFrames = sizeof(pStagingBuffer) / mal_get_sample_size_in_bytes(pSRC->config.formatIn) / pSRC->config.channels;
+        mal_uint32 framesToRead = stagingBufferSizeInFrames;
+        if (framesToRead > frameCount) {
+            framesToRead = frameCount;
+        }
+
+        mal_uint32 framesRead = pSRC->onRead(pSRC, framesToRead, pStagingBuffer, pSRC->pUserData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        mal_pcm_convert(pFramesOut, pSRC->config.formatOut, pStagingBuffer, pSRC->config.formatIn, framesRead * pSRC->config.channels);
+
+        pFramesOut  = (mal_uint8*)pFramesOut + (framesRead * pSRC->config.channels * mal_get_sample_size_in_bytes(pSRC->config.formatOut));
+        frameCount -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+mal_uint32 mal_src_read_frames_linear(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush)
+{
+    mal_assert(pSRC != NULL);
+    mal_assert(frameCount > 0);
+    mal_assert(pFramesOut != NULL);
+
+    // For linear SRC, the bin is only 2 frames: 1 prior, 1 future.
+
+    // Load the bin if necessary.
+    if (!pSRC->linear.isPrevFramesLoaded) {
+        mal_uint32 framesRead = mal_src_cache_read_frames(&pSRC->cache, 1, pSRC->bin);
+        if (framesRead == 0) {
+            return 0;
+        }
+        pSRC->linear.isPrevFramesLoaded = MAL_TRUE;
+    }
+    if (!pSRC->linear.isNextFramesLoaded) {
+        mal_uint32 framesRead = mal_src_cache_read_frames(&pSRC->cache, 1, pSRC->bin + pSRC->config.channels);
+        if (framesRead == 0) {
+            return 0;
+        }
+        pSRC->linear.isNextFramesLoaded = MAL_TRUE;
+    }
+
+    float factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
+
+    mal_uint32 totalFramesRead = 0;
+    while (frameCount > 0) {
+        // The bin is where the previous and next frames are located.
+        float* pPrevFrame = pSRC->bin;
+        float* pNextFrame = pSRC->bin + pSRC->config.channels;
+
+        float pFrame[MAL_MAX_CHANNELS];
+        mal_blend_f32(pFrame, pPrevFrame, pNextFrame, pSRC->linear.alpha, pSRC->config.channels);
+
+        pSRC->linear.alpha += factor;
+
+        // The new alpha value is how we determine whether or not we need to read fresh frames.
+        mal_uint32 framesToReadFromClient = (mal_uint32)pSRC->linear.alpha;
+        pSRC->linear.alpha = pSRC->linear.alpha - framesToReadFromClient;
+
+        for (mal_uint32 i = 0; i < framesToReadFromClient; ++i) {
+            for (mal_uint32 j = 0; j < pSRC->config.channels; ++j) {
+                pPrevFrame[j] = pNextFrame[j];
+            }
+
+            mal_uint32 framesRead = mal_src_cache_read_frames(&pSRC->cache, 1, pNextFrame);
+            if (framesRead == 0) {
+                for (mal_uint32 j = 0; j < pSRC->config.channels; ++j) {
+                    pNextFrame[j] = 0;
+                }
+
+                if (pSRC->linear.isNextFramesLoaded) {
+                    pSRC->linear.isNextFramesLoaded = MAL_FALSE;
+                } else {
+                    if (flush) {
+                        pSRC->linear.isPrevFramesLoaded = MAL_FALSE;
+                    }
+                }
+
+                break;
+            }
+        }
+
+        mal_pcm_convert(pFramesOut, pSRC->config.formatOut, pFrame, mal_format_f32, 1 * pSRC->config.channels);
+
+        pFramesOut  = (mal_uint8*)pFramesOut + (1 * pSRC->config.channels * mal_get_sample_size_in_bytes(pSRC->config.formatOut));
+        frameCount -= 1;
+        totalFramesRead += 1;
+
+        // If there's no frames available we need to get out of this loop.
+        if (!pSRC->linear.isNextFramesLoaded && (!flush || !pSRC->linear.isPrevFramesLoaded)) {
+            break;
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// FORMAT CONVERSION
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void mal_pcm_u8_to_s16(short* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_s24(void* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_s32(int* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_u8_to_f32(float* pOut, const unsigned char* pIn, unsigned int count);
+void mal_pcm_s16_to_u8(unsigned char* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_s24(void* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_s32(int* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s16_to_f32(float* pOut, const short* pIn, unsigned int count);
+void mal_pcm_s24_to_u8(unsigned char* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_s16(short* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_s32(int* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s24_to_f32(float* pOut, const void* pIn, unsigned int count);
+void mal_pcm_s32_to_u8(unsigned char* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_s16(short* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_s24(void* pOut, const int* pIn, unsigned int count);
+void mal_pcm_s32_to_f32(float* pOut, const int* pIn, unsigned int count);
+void mal_pcm_f32_to_u8(unsigned char* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s16(short* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s24(void* pOut, const float* pIn, unsigned int count);
+void mal_pcm_f32_to_s32(int* pOut, const float* pIn, unsigned int count);
+
+void mal_pcm_convert(void* pOut, mal_format formatOut, const void* pIn, mal_format formatIn, unsigned int sampleCount)
+{
+    if (formatOut == formatIn) {
+        mal_copy_memory(pOut, pIn, sampleCount * mal_get_sample_size_in_bytes(formatOut));
+        return;
+    }
+
+    switch (formatIn)
+    {
+        case mal_format_u8:
+        {
+            switch (formatOut)
+            {
+                case mal_format_s16: mal_pcm_u8_to_s16((short*)pOut, (const unsigned char*)pIn, sampleCount); return;
+                case mal_format_s24: mal_pcm_u8_to_s24(        pOut, (const unsigned char*)pIn, sampleCount); return;
+                case mal_format_s32: mal_pcm_u8_to_s32(  (int*)pOut, (const unsigned char*)pIn, sampleCount); return;
+                case mal_format_f32: mal_pcm_u8_to_f32((float*)pOut, (const unsigned char*)pIn, sampleCount); return;
+                default: break;
+            }
+        } break;
+
+        case mal_format_s16:
+        {
+            switch (formatOut)
+            {
+                case mal_format_u8:  mal_pcm_s16_to_u8( (unsigned char*)pOut, (const short*)pIn, sampleCount); return;
+                case mal_format_s24: mal_pcm_s16_to_s24(                pOut, (const short*)pIn, sampleCount); return;
+                case mal_format_s32: mal_pcm_s16_to_s32(          (int*)pOut, (const short*)pIn, sampleCount); return;
+                case mal_format_f32: mal_pcm_s16_to_f32(        (float*)pOut, (const short*)pIn, sampleCount); return;
+                default: break;
+            }
+        } break;
+
+        case mal_format_s24:
+        {
+            switch (formatOut)
+            {
+                case mal_format_u8:  mal_pcm_s24_to_u8( (unsigned char*)pOut, pIn, sampleCount); return;
+                case mal_format_s16: mal_pcm_s24_to_s16(        (short*)pOut, pIn, sampleCount); return;
+                case mal_format_s32: mal_pcm_s24_to_s32(          (int*)pOut, pIn, sampleCount); return;
+                case mal_format_f32: mal_pcm_s24_to_f32(        (float*)pOut, pIn, sampleCount); return;
+                default: break;
+            }
+        } break;
+
+        case mal_format_s32:
+        {
+            switch (formatOut)
+            {
+                case mal_format_u8:  mal_pcm_s32_to_u8( (unsigned char*)pOut, (const int*)pIn, sampleCount); return;
+                case mal_format_s16: mal_pcm_s32_to_s16(        (short*)pOut, (const int*)pIn, sampleCount); return;
+                case mal_format_s24: mal_pcm_s32_to_s24(                pOut, (const int*)pIn, sampleCount); return;
+                case mal_format_f32: mal_pcm_s32_to_f32(        (float*)pOut, (const int*)pIn, sampleCount); return;
+                default: break;
+            }
+        } break;
+
+        case mal_format_f32:
+        {
+            switch (formatOut)
+            {
+                case mal_format_u8:  mal_pcm_f32_to_u8( (unsigned char*)pOut, (const float*)pIn, sampleCount); return;
+                case mal_format_s16: mal_pcm_f32_to_s16(        (short*)pOut, (const float*)pIn, sampleCount); return;
+                case mal_format_s24: mal_pcm_f32_to_s24(                pOut, (const float*)pIn, sampleCount); return;
+                case mal_format_s32: mal_pcm_f32_to_s32(          (int*)pOut, (const float*)pIn, sampleCount); return;
+                default: break;
+            }
+        } break;
+
+        default: break;
+    }
+}
+
+
+static void mal_rearrange_channels_u8(mal_uint8* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    mal_uint8 temp[MAL_MAX_CHANNELS];
+    mal_copy_memory(temp, pFrame, sizeof(temp[0]) * channels);
+
+    switch (channels) {
+        case 18: pFrame[17] = temp[channelMap[17]];
+        case 17: pFrame[16] = temp[channelMap[16]];
+        case 16: pFrame[15] = temp[channelMap[15]];
+        case 15: pFrame[14] = temp[channelMap[14]];
+        case 14: pFrame[13] = temp[channelMap[13]];
+        case 13: pFrame[12] = temp[channelMap[12]];
+        case 12: pFrame[11] = temp[channelMap[11]];
+        case 11: pFrame[10] = temp[channelMap[10]];
+        case 10: pFrame[ 9] = temp[channelMap[ 9]];
+        case  9: pFrame[ 8] = temp[channelMap[ 8]];
+        case  8: pFrame[ 7] = temp[channelMap[ 7]];
+        case  7: pFrame[ 6] = temp[channelMap[ 6]];
+        case  6: pFrame[ 5] = temp[channelMap[ 5]];
+        case  5: pFrame[ 4] = temp[channelMap[ 4]];
+        case  4: pFrame[ 3] = temp[channelMap[ 3]];
+        case  3: pFrame[ 2] = temp[channelMap[ 2]];
+        case  2: pFrame[ 1] = temp[channelMap[ 1]];
+        case  1: pFrame[ 0] = temp[channelMap[ 0]];
+    }
+}
+
+static void mal_rearrange_channels_s16(mal_int16* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    mal_int16 temp[MAL_MAX_CHANNELS];
+    mal_copy_memory(temp, pFrame, sizeof(temp[0]) * channels);
+
+    switch (channels) {
+        case 18: pFrame[17] = temp[channelMap[17]];
+        case 17: pFrame[16] = temp[channelMap[16]];
+        case 16: pFrame[15] = temp[channelMap[15]];
+        case 15: pFrame[14] = temp[channelMap[14]];
+        case 14: pFrame[13] = temp[channelMap[13]];
+        case 13: pFrame[12] = temp[channelMap[12]];
+        case 12: pFrame[11] = temp[channelMap[11]];
+        case 11: pFrame[10] = temp[channelMap[10]];
+        case 10: pFrame[ 9] = temp[channelMap[ 9]];
+        case  9: pFrame[ 8] = temp[channelMap[ 8]];
+        case  8: pFrame[ 7] = temp[channelMap[ 7]];
+        case  7: pFrame[ 6] = temp[channelMap[ 6]];
+        case  6: pFrame[ 5] = temp[channelMap[ 5]];
+        case  5: pFrame[ 4] = temp[channelMap[ 4]];
+        case  4: pFrame[ 3] = temp[channelMap[ 3]];
+        case  3: pFrame[ 2] = temp[channelMap[ 2]];
+        case  2: pFrame[ 1] = temp[channelMap[ 1]];
+        case  1: pFrame[ 0] = temp[channelMap[ 0]];
+    }
+}
+
+static void mal_rearrange_channels_s32(mal_int32* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    mal_int32 temp[MAL_MAX_CHANNELS];
+    mal_copy_memory(temp, pFrame, sizeof(temp[0]) * channels);
+
+    switch (channels) {
+        case 18: pFrame[17] = temp[channelMap[17]];
+        case 17: pFrame[16] = temp[channelMap[16]];
+        case 16: pFrame[15] = temp[channelMap[15]];
+        case 15: pFrame[14] = temp[channelMap[14]];
+        case 14: pFrame[13] = temp[channelMap[13]];
+        case 13: pFrame[12] = temp[channelMap[12]];
+        case 12: pFrame[11] = temp[channelMap[11]];
+        case 11: pFrame[10] = temp[channelMap[10]];
+        case 10: pFrame[ 9] = temp[channelMap[ 9]];
+        case  9: pFrame[ 8] = temp[channelMap[ 8]];
+        case  8: pFrame[ 7] = temp[channelMap[ 7]];
+        case  7: pFrame[ 6] = temp[channelMap[ 6]];
+        case  6: pFrame[ 5] = temp[channelMap[ 5]];
+        case  5: pFrame[ 4] = temp[channelMap[ 4]];
+        case  4: pFrame[ 3] = temp[channelMap[ 3]];
+        case  3: pFrame[ 2] = temp[channelMap[ 2]];
+        case  2: pFrame[ 1] = temp[channelMap[ 1]];
+        case  1: pFrame[ 0] = temp[channelMap[ 0]];
+    }
+}
+
+static void mal_rearrange_channels_f32(float* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS])
+{
+    float temp[MAL_MAX_CHANNELS];
+    mal_copy_memory(temp, pFrame, sizeof(temp[0]) * channels);
+
+    switch (channels) {
+        case 18: pFrame[17] = temp[channelMap[17]];
+        case 17: pFrame[16] = temp[channelMap[16]];
+        case 16: pFrame[15] = temp[channelMap[15]];
+        case 15: pFrame[14] = temp[channelMap[14]];
+        case 14: pFrame[13] = temp[channelMap[13]];
+        case 13: pFrame[12] = temp[channelMap[12]];
+        case 12: pFrame[11] = temp[channelMap[11]];
+        case 11: pFrame[10] = temp[channelMap[10]];
+        case 10: pFrame[ 9] = temp[channelMap[ 9]];
+        case  9: pFrame[ 8] = temp[channelMap[ 8]];
+        case  8: pFrame[ 7] = temp[channelMap[ 7]];
+        case  7: pFrame[ 6] = temp[channelMap[ 6]];
+        case  6: pFrame[ 5] = temp[channelMap[ 5]];
+        case  5: pFrame[ 4] = temp[channelMap[ 4]];
+        case  4: pFrame[ 3] = temp[channelMap[ 3]];
+        case  3: pFrame[ 2] = temp[channelMap[ 2]];
+        case  2: pFrame[ 1] = temp[channelMap[ 1]];
+        case  1: pFrame[ 0] = temp[channelMap[ 0]];
+    }
+}
+
+static void mal_rearrange_channels_generic(void* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS], mal_format format)
+{
+    mal_uint32 sampleSizeInBytes = mal_get_sample_size_in_bytes(format);
+
+    mal_uint8 temp[MAL_MAX_CHANNELS * 8];   // x8 to ensure it's large enough for all formats.
+    mal_copy_memory(temp, pFrame, sampleSizeInBytes * channels);
+
+    switch (channels) {
+        case 18: mal_copy_memory((mal_uint8*)pFrame + (17 * sampleSizeInBytes), &temp[channelMap[17] * sampleSizeInBytes], sampleSizeInBytes);
+        case 17: mal_copy_memory((mal_uint8*)pFrame + (16 * sampleSizeInBytes), &temp[channelMap[16] * sampleSizeInBytes], sampleSizeInBytes);
+        case 16: mal_copy_memory((mal_uint8*)pFrame + (15 * sampleSizeInBytes), &temp[channelMap[15] * sampleSizeInBytes], sampleSizeInBytes);
+        case 15: mal_copy_memory((mal_uint8*)pFrame + (14 * sampleSizeInBytes), &temp[channelMap[14] * sampleSizeInBytes], sampleSizeInBytes);
+        case 14: mal_copy_memory((mal_uint8*)pFrame + (13 * sampleSizeInBytes), &temp[channelMap[13] * sampleSizeInBytes], sampleSizeInBytes);
+        case 13: mal_copy_memory((mal_uint8*)pFrame + (12 * sampleSizeInBytes), &temp[channelMap[12] * sampleSizeInBytes], sampleSizeInBytes);
+        case 12: mal_copy_memory((mal_uint8*)pFrame + (11 * sampleSizeInBytes), &temp[channelMap[11] * sampleSizeInBytes], sampleSizeInBytes);
+        case 11: mal_copy_memory((mal_uint8*)pFrame + (10 * sampleSizeInBytes), &temp[channelMap[10] * sampleSizeInBytes], sampleSizeInBytes);
+        case 10: mal_copy_memory((mal_uint8*)pFrame + ( 9 * sampleSizeInBytes), &temp[channelMap[ 9] * sampleSizeInBytes], sampleSizeInBytes);
+        case  9: mal_copy_memory((mal_uint8*)pFrame + ( 8 * sampleSizeInBytes), &temp[channelMap[ 8] * sampleSizeInBytes], sampleSizeInBytes);
+        case  8: mal_copy_memory((mal_uint8*)pFrame + ( 7 * sampleSizeInBytes), &temp[channelMap[ 7] * sampleSizeInBytes], sampleSizeInBytes);
+        case  7: mal_copy_memory((mal_uint8*)pFrame + ( 6 * sampleSizeInBytes), &temp[channelMap[ 6] * sampleSizeInBytes], sampleSizeInBytes);
+        case  6: mal_copy_memory((mal_uint8*)pFrame + ( 5 * sampleSizeInBytes), &temp[channelMap[ 5] * sampleSizeInBytes], sampleSizeInBytes);
+        case  5: mal_copy_memory((mal_uint8*)pFrame + ( 4 * sampleSizeInBytes), &temp[channelMap[ 4] * sampleSizeInBytes], sampleSizeInBytes);
+        case  4: mal_copy_memory((mal_uint8*)pFrame + ( 3 * sampleSizeInBytes), &temp[channelMap[ 3] * sampleSizeInBytes], sampleSizeInBytes);
+        case  3: mal_copy_memory((mal_uint8*)pFrame + ( 2 * sampleSizeInBytes), &temp[channelMap[ 2] * sampleSizeInBytes], sampleSizeInBytes);
+        case  2: mal_copy_memory((mal_uint8*)pFrame + ( 1 * sampleSizeInBytes), &temp[channelMap[ 1] * sampleSizeInBytes], sampleSizeInBytes);
+        case  1: mal_copy_memory((mal_uint8*)pFrame + ( 0 * sampleSizeInBytes), &temp[channelMap[ 0] * sampleSizeInBytes], sampleSizeInBytes);
+    }
+}
+
+static void mal_rearrange_channels(void* pFrame, mal_uint32 channels, mal_uint8 channelMap[MAL_MAX_CHANNELS], mal_format format)
+{
+    switch (format)
+    {
+    case mal_format_u8:  mal_rearrange_channels_u8( (mal_uint8*)pFrame, channels, channelMap); break;
+    case mal_format_s16: mal_rearrange_channels_s16((mal_int16*)pFrame, channels, channelMap); break;
+    case mal_format_s32: mal_rearrange_channels_s32((mal_int32*)pFrame, channels, channelMap); break;
+    case mal_format_f32: mal_rearrange_channels_f32(    (float*)pFrame, channels, channelMap); break;
+    default:             mal_rearrange_channels_generic(pFrame, channels, channelMap, format); break;
+    }
+}
+
+static void mal_dsp_mix_channels__dec(float* pFramesOut, mal_uint32 channelsOut, const mal_uint8 channelMapOut[MAL_MAX_CHANNELS], const float* pFramesIn, mal_uint32 channelsIn, const mal_uint8 channelMapIn[MAL_MAX_CHANNELS], mal_uint32 frameCount, mal_channel_mix_mode mode)
+{
+    mal_assert(pFramesOut != NULL);
+    mal_assert(channelsOut > 0);
+    mal_assert(pFramesIn != NULL);
+    mal_assert(channelsIn > 0);
+    mal_assert(channelsOut < channelsIn);
+
+    (void)channelMapOut;
+    (void)channelMapIn;
+
+    if (mode == mal_channel_mix_mode_basic) {
+        // Basic mode is where we just drop excess channels.
+        for (mal_uint32 iFrame = 0; iFrame < frameCount; ++iFrame) {
+            switch (channelsOut) {
+                case 17: pFramesOut[iFrame*channelsOut+16] = pFramesIn[iFrame*channelsIn+16];
+                case 16: pFramesOut[iFrame*channelsOut+15] = pFramesIn[iFrame*channelsIn+15];
+                case 15: pFramesOut[iFrame*channelsOut+14] = pFramesIn[iFrame*channelsIn+14];
+                case 14: pFramesOut[iFrame*channelsOut+13] = pFramesIn[iFrame*channelsIn+13];
+                case 13: pFramesOut[iFrame*channelsOut+12] = pFramesIn[iFrame*channelsIn+12];
+                case 12: pFramesOut[iFrame*channelsOut+11] = pFramesIn[iFrame*channelsIn+11];
+                case 11: pFramesOut[iFrame*channelsOut+10] = pFramesIn[iFrame*channelsIn+10];
+                case 10: pFramesOut[iFrame*channelsOut+ 9] = pFramesIn[iFrame*channelsIn+ 9];
+                case  9: pFramesOut[iFrame*channelsOut+ 8] = pFramesIn[iFrame*channelsIn+ 8];
+                case  8: pFramesOut[iFrame*channelsOut+ 7] = pFramesIn[iFrame*channelsIn+ 7];
+                case  7: pFramesOut[iFrame*channelsOut+ 6] = pFramesIn[iFrame*channelsIn+ 6];
+                case  6: pFramesOut[iFrame*channelsOut+ 5] = pFramesIn[iFrame*channelsIn+ 5];
+                case  5: pFramesOut[iFrame*channelsOut+ 4] = pFramesIn[iFrame*channelsIn+ 4];
+                case  4: pFramesOut[iFrame*channelsOut+ 3] = pFramesIn[iFrame*channelsIn+ 3];
+                case  3: pFramesOut[iFrame*channelsOut+ 2] = pFramesIn[iFrame*channelsIn+ 2];
+                case  2: pFramesOut[iFrame*channelsOut+ 1] = pFramesIn[iFrame*channelsIn+ 1];
+                case  1: pFramesOut[iFrame*channelsOut+ 0] = pFramesIn[iFrame*channelsIn+ 0];
+            }
+        }
+    } else {
+        // Blend mode is where we just use simple averaging to blend based on spacial locality.
+        if (channelsOut == 1) {
+            for (mal_uint32 iFrame = 0; iFrame < frameCount; ++iFrame) {
+                float total = 0;
+                switch (channelsIn) {
+                    case 18: total += pFramesIn[iFrame*channelsIn+17];
+                    case 17: total += pFramesIn[iFrame*channelsIn+16];
+                    case 16: total += pFramesIn[iFrame*channelsIn+15];
+                    case 15: total += pFramesIn[iFrame*channelsIn+14];
+                    case 14: total += pFramesIn[iFrame*channelsIn+13];
+                    case 13: total += pFramesIn[iFrame*channelsIn+12];
+                    case 12: total += pFramesIn[iFrame*channelsIn+11];
+                    case 11: total += pFramesIn[iFrame*channelsIn+10];
+                    case 10: total += pFramesIn[iFrame*channelsIn+ 9];
+                    case  9: total += pFramesIn[iFrame*channelsIn+ 8];
+                    case  8: total += pFramesIn[iFrame*channelsIn+ 7];
+                    case  7: total += pFramesIn[iFrame*channelsIn+ 6];
+                    case  6: total += pFramesIn[iFrame*channelsIn+ 5];
+                    case  5: total += pFramesIn[iFrame*channelsIn+ 4];
+                    case  4: total += pFramesIn[iFrame*channelsIn+ 3];
+                    case  3: total += pFramesIn[iFrame*channelsIn+ 2];
+                    case  2: total += pFramesIn[iFrame*channelsIn+ 1];
+                    case  1: total += pFramesIn[iFrame*channelsIn+ 0];
+                }
+
+                pFramesOut[iFrame+0] = total / channelsIn;
+            }
+        } else if (channelsOut == 2) {
+            // TODO: Implement proper stereo blending.
+            mal_dsp_mix_channels__dec(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mal_channel_mix_mode_basic);
+        } else {
+            // Fall back to basic mode.
+            mal_dsp_mix_channels__dec(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mal_channel_mix_mode_basic);
+        }
+    }
+}
+
+static void mal_dsp_mix_channels__inc(float* pFramesOut, mal_uint32 channelsOut, const mal_uint8 channelMapOut[MAL_MAX_CHANNELS], const float* pFramesIn, mal_uint32 channelsIn, const mal_uint8 channelMapIn[MAL_MAX_CHANNELS], mal_uint32 frameCount, mal_channel_mix_mode mode)
+{
+    mal_assert(pFramesOut != NULL);
+    mal_assert(channelsOut > 0);
+    mal_assert(pFramesIn != NULL);
+    mal_assert(channelsIn > 0);
+    mal_assert(channelsOut > channelsIn);
+
+    (void)channelMapOut;
+    (void)channelMapIn;
+
+    if (mode == mal_channel_mix_mode_basic) {\
+        // Basic mode is where we just zero out extra channels.
+        for (mal_uint32 iFrame = 0; iFrame < frameCount; ++iFrame) {
+            switch (channelsIn) {
+                case 17: pFramesOut[iFrame*channelsOut+16] = pFramesIn[iFrame*channelsIn+16];
+                case 16: pFramesOut[iFrame*channelsOut+15] = pFramesIn[iFrame*channelsIn+15];
+                case 15: pFramesOut[iFrame*channelsOut+14] = pFramesIn[iFrame*channelsIn+14];
+                case 14: pFramesOut[iFrame*channelsOut+13] = pFramesIn[iFrame*channelsIn+13];
+                case 13: pFramesOut[iFrame*channelsOut+12] = pFramesIn[iFrame*channelsIn+12];
+                case 12: pFramesOut[iFrame*channelsOut+11] = pFramesIn[iFrame*channelsIn+11];
+                case 11: pFramesOut[iFrame*channelsOut+10] = pFramesIn[iFrame*channelsIn+10];
+                case 10: pFramesOut[iFrame*channelsOut+ 9] = pFramesIn[iFrame*channelsIn+ 9];
+                case  9: pFramesOut[iFrame*channelsOut+ 8] = pFramesIn[iFrame*channelsIn+ 8];
+                case  8: pFramesOut[iFrame*channelsOut+ 7] = pFramesIn[iFrame*channelsIn+ 7];
+                case  7: pFramesOut[iFrame*channelsOut+ 6] = pFramesIn[iFrame*channelsIn+ 6];
+                case  6: pFramesOut[iFrame*channelsOut+ 5] = pFramesIn[iFrame*channelsIn+ 5];
+                case  5: pFramesOut[iFrame*channelsOut+ 4] = pFramesIn[iFrame*channelsIn+ 4];
+                case  4: pFramesOut[iFrame*channelsOut+ 3] = pFramesIn[iFrame*channelsIn+ 3];
+                case  3: pFramesOut[iFrame*channelsOut+ 2] = pFramesIn[iFrame*channelsIn+ 2];
+                case  2: pFramesOut[iFrame*channelsOut+ 1] = pFramesIn[iFrame*channelsIn+ 1];
+                case  1: pFramesOut[iFrame*channelsOut+ 0] = pFramesIn[iFrame*channelsIn+ 0];
+            }
+
+            // Zero out extra channels.
+            switch (channelsOut - channelsIn) {
+                case 17: pFramesOut[iFrame*channelsOut+16] = 0;
+                case 16: pFramesOut[iFrame*channelsOut+15] = 0;
+                case 15: pFramesOut[iFrame*channelsOut+14] = 0;
+                case 14: pFramesOut[iFrame*channelsOut+13] = 0;
+                case 13: pFramesOut[iFrame*channelsOut+12] = 0;
+                case 12: pFramesOut[iFrame*channelsOut+11] = 0;
+                case 11: pFramesOut[iFrame*channelsOut+10] = 0;
+                case 10: pFramesOut[iFrame*channelsOut+ 9] = 0;
+                case  9: pFramesOut[iFrame*channelsOut+ 8] = 0;
+                case  8: pFramesOut[iFrame*channelsOut+ 7] = 0;
+                case  7: pFramesOut[iFrame*channelsOut+ 6] = 0;
+                case  6: pFramesOut[iFrame*channelsOut+ 5] = 0;
+                case  5: pFramesOut[iFrame*channelsOut+ 4] = 0;
+                case  4: pFramesOut[iFrame*channelsOut+ 3] = 0;
+                case  3: pFramesOut[iFrame*channelsOut+ 2] = 0;
+                case  2: pFramesOut[iFrame*channelsOut+ 1] = 0;
+                case  1: pFramesOut[iFrame*channelsOut+ 0] = 0;
+            }
+        }
+    } else {
+        // Using blended mixing mode. Basically this is just the mode where audio is distributed across all channels
+        // based on spacial locality.
+        if (channelsIn == 1) {
+            for (mal_uint32 iFrame = 0; iFrame < frameCount; ++iFrame) {
+                switch (channelsOut) {
+                    case 18: pFramesOut[iFrame*channelsOut+17] = pFramesIn[iFrame*channelsIn+0];
+                    case 17: pFramesOut[iFrame*channelsOut+16] = pFramesIn[iFrame*channelsIn+0];
+                    case 16: pFramesOut[iFrame*channelsOut+15] = pFramesIn[iFrame*channelsIn+0];
+                    case 15: pFramesOut[iFrame*channelsOut+14] = pFramesIn[iFrame*channelsIn+0];
+                    case 14: pFramesOut[iFrame*channelsOut+13] = pFramesIn[iFrame*channelsIn+0];
+                    case 13: pFramesOut[iFrame*channelsOut+12] = pFramesIn[iFrame*channelsIn+0];
+                    case 12: pFramesOut[iFrame*channelsOut+11] = pFramesIn[iFrame*channelsIn+0];
+                    case 11: pFramesOut[iFrame*channelsOut+10] = pFramesIn[iFrame*channelsIn+0];
+                    case 10: pFramesOut[iFrame*channelsOut+ 9] = pFramesIn[iFrame*channelsIn+0];
+                    case  9: pFramesOut[iFrame*channelsOut+ 8] = pFramesIn[iFrame*channelsIn+0];
+                    case  8: pFramesOut[iFrame*channelsOut+ 7] = pFramesIn[iFrame*channelsIn+0];
+                    case  7: pFramesOut[iFrame*channelsOut+ 6] = pFramesIn[iFrame*channelsIn+0];
+                    case  6: pFramesOut[iFrame*channelsOut+ 5] = pFramesIn[iFrame*channelsIn+0];
+                    case  5: pFramesOut[iFrame*channelsOut+ 4] = pFramesIn[iFrame*channelsIn+0];
+                    case  4: pFramesOut[iFrame*channelsOut+ 3] = pFramesIn[iFrame*channelsIn+0];
+                    case  3: pFramesOut[iFrame*channelsOut+ 2] = pFramesIn[iFrame*channelsIn+0];
+                    case  2: pFramesOut[iFrame*channelsOut+ 1] = pFramesIn[iFrame*channelsIn+0];
+                    case  1: pFramesOut[iFrame*channelsOut+ 0] = pFramesIn[iFrame*channelsIn+0];
+                }
+            }
+        } else if (channelsIn == 2) {
+            // TODO: Implement an optimized stereo conversion.
+            mal_dsp_mix_channels__dec(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mal_channel_mix_mode_basic);
+        } else {
+            // Fall back to basic mixing mode.
+            mal_dsp_mix_channels__dec(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mal_channel_mix_mode_basic);
+        }
+    }
+}
+
+static void mal_dsp_mix_channels(float* pFramesOut, mal_uint32 channelsOut, const mal_uint8 channelMapOut[MAL_MAX_CHANNELS], const float* pFramesIn, mal_uint32 channelsIn, const mal_uint8 channelMapIn[MAL_MAX_CHANNELS], mal_uint32 frameCount, mal_channel_mix_mode mode)
+{
+    if (channelsIn < channelsOut) {
+        // Increasing the channel count.
+        mal_dsp_mix_channels__inc(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mode);
+    } else {
+        // Decreasing the channel count.
+        mal_dsp_mix_channels__dec(pFramesOut, channelsOut, channelMapOut, pFramesIn, channelsIn, channelMapIn, frameCount, mode);
+    }
+}
+
+
+mal_uint32 mal_dsp__src_on_read(mal_src* pSRC, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
+{
+    (void)pSRC;
+
+    mal_dsp* pDSP = (mal_dsp*)pUserData;
+    mal_assert(pDSP != NULL);
+
+    return pDSP->onRead(pDSP, frameCount, pFramesOut, pDSP->pUserDataForOnRead);
+}
+
+mal_result mal_dsp_init(mal_dsp_config* pConfig, mal_dsp_read_proc onRead, void* pUserData, mal_dsp* pDSP)
+{
+    if (pDSP == NULL) return MAL_INVALID_ARGS;
+    mal_zero_object(pDSP);
+    pDSP->config = *pConfig;
+    pDSP->onRead = onRead;
+    pDSP->pUserDataForOnRead = pUserData;
+
+    if (pDSP->config.cacheSizeInFrames > MAL_SRC_CACHE_SIZE_IN_FRAMES || pDSP->config.cacheSizeInFrames == 0) {
+        pDSP->config.cacheSizeInFrames = MAL_SRC_CACHE_SIZE_IN_FRAMES;
+    }
+
+    if (pConfig->sampleRateIn != pConfig->sampleRateOut) {
+        pDSP->isSRCRequired = MAL_TRUE;
+
+        mal_src_config srcConfig;
+        srcConfig.sampleRateIn = pConfig->sampleRateIn;
+        srcConfig.sampleRateOut = pConfig->sampleRateOut;
+        srcConfig.formatIn = pConfig->formatIn;
+        srcConfig.formatOut = mal_format_f32;
+        srcConfig.channels = pConfig->channelsIn;
+        srcConfig.algorithm = mal_src_algorithm_linear;
+        srcConfig.cacheSizeInFrames = pConfig->cacheSizeInFrames;
+        mal_result result = mal_src_init(&srcConfig, mal_dsp__src_on_read, pDSP, &pDSP->src);
+        if (result != MAL_SUCCESS) {
+            return result;
+        }
+    }
+
+
+
+    pDSP->isChannelMappingRequired = MAL_FALSE;
+    if (pConfig->channelMapIn[0] != MAL_CHANNEL_NONE && pConfig->channelMapOut[0] != MAL_CHANNEL_NONE) {    // <-- Channel mapping will be ignored if the first channel map is MAL_CHANNEL_NONE.
+        // When using channel mapping we need to figure out a shuffling table. The first thing to do is convert the input channel map
+        // so that it contains the same number of channels as the output channel count.
+        mal_uint32 iChannel;
+        mal_uint32 channelsMin = mal_min(pConfig->channelsIn, pConfig->channelsOut);
+        for (iChannel = 0; iChannel < channelsMin; ++iChannel) {
+            pDSP->channelMapInPostMix[iChannel] = pConfig->channelMapIn[iChannel];
+        }
+
+        // Any excess channels need to be filled with the relevant channels from the output channel map. Currently we're justing filling it with
+        // the first channels that are not present in the input channel map.
+        if (pConfig->channelsOut > pConfig->channelsIn) {
+            for (iChannel = pConfig->channelsIn; iChannel < pConfig->channelsOut; ++iChannel) {
+                mal_uint8 newChannel = MAL_CHANNEL_NONE;
+                for (mal_uint32 iChannelOut = 0; iChannelOut < pConfig->channelsOut; ++iChannelOut) {
+                    mal_bool32 exists = MAL_FALSE;
+                    for (mal_uint32 iChannelIn = 0; iChannelIn < pConfig->channelsIn; ++iChannelIn) {
+                        if (pConfig->channelMapOut[iChannelOut] == pConfig->channelMapIn[iChannelIn]) {
+                            exists = MAL_TRUE;
+                            break;
+                        }
+                    }
+
+                    if (!exists) {
+                        newChannel = pConfig->channelMapOut[iChannelOut];
+                        break;
+                    }
+                }
+
+                pDSP->channelMapInPostMix[iChannel] = newChannel;
+            }
+        }
+
+        // We only need to do a channel mapping if the map after mixing is different to the final output map.
+        for (iChannel = 0; iChannel < pConfig->channelsOut; ++iChannel) {
+            if (pDSP->channelMapInPostMix[iChannel] != pConfig->channelMapOut[iChannel]) {
+                pDSP->isChannelMappingRequired = MAL_TRUE;
+                break;
+            }
+        }
+
+        // Now we need to create the shuffling table.
+        if (pDSP->isChannelMappingRequired) {
+            for (mal_uint32 iChannelIn = 0; iChannelIn < pConfig->channelsOut; ++iChannelIn) {
+                for (mal_uint32 iChannelOut = 0; iChannelOut < pConfig->channelsOut; ++iChannelOut) {
+                    if (pDSP->channelMapInPostMix[iChannelOut] == pConfig->channelMapOut[iChannelIn]) {
+                        pDSP->channelShuffleTable[iChannelOut] = (mal_uint8)iChannelIn;
+                    }
+                }
+            }
+        }
+    }
+
+    if (pConfig->formatIn == pConfig->formatOut && pConfig->channelsIn == pConfig->channelsOut && pConfig->sampleRateIn == pConfig->sampleRateOut && !pDSP->isChannelMappingRequired) {
+        pDSP->isPassthrough = MAL_TRUE;
+    } else {
+        pDSP->isPassthrough = MAL_FALSE;
+    }
+
+    return MAL_SUCCESS;
+}
+
+mal_result mal_dsp_set_output_sample_rate(mal_dsp* pDSP, mal_uint32 sampleRateOut)
+{
+    if (pDSP == NULL) return MAL_INVALID_ARGS;
+
+    // Must have a sample rate of > 0.
+    if (sampleRateOut == 0) {
+        return MAL_INVALID_ARGS;
+    }
+
+    pDSP->config.sampleRateOut = sampleRateOut;
+
+    // If we already have an SRC pipeline initialized we do _not_ want to re-create it. Instead we adjust it. If we didn't previously
+    // have an SRC pipeline in place we'll need to initialize it.
+    if (pDSP->isSRCRequired) {
+        if (pDSP->config.sampleRateIn != pDSP->config.sampleRateOut) {
+            mal_src_set_output_sample_rate(&pDSP->src, sampleRateOut);
+        } else {
+            pDSP->isSRCRequired = MAL_FALSE;
+        }
+    } else {
+        // We may need a new SRC pipeline.
+        if (pDSP->config.sampleRateIn != pDSP->config.sampleRateOut) {
+            pDSP->isSRCRequired = MAL_TRUE;
+
+            mal_src_config srcConfig;
+            srcConfig.sampleRateIn      = pDSP->config.sampleRateIn;
+            srcConfig.sampleRateOut     = pDSP->config.sampleRateOut;
+            srcConfig.formatIn          = pDSP->config.formatIn;
+            srcConfig.formatOut         = mal_format_f32;
+            srcConfig.channels          = pDSP->config.channelsIn;
+            srcConfig.algorithm         = mal_src_algorithm_linear;
+            srcConfig.cacheSizeInFrames = pDSP->config.cacheSizeInFrames;
+            mal_result result = mal_src_init(&srcConfig, mal_dsp__src_on_read, pDSP, &pDSP->src);
+            if (result != MAL_SUCCESS) {
+                return result;
+            }
+        } else {
+            pDSP->isSRCRequired = MAL_FALSE;
+        }
+    }
+
+    // Update whether or not the pipeline is a passthrough.
+    if (pDSP->config.formatIn == pDSP->config.formatOut && pDSP->config.channelsIn == pDSP->config.channelsOut && pDSP->config.sampleRateIn == pDSP->config.sampleRateOut && !pDSP->isChannelMappingRequired) {
+        pDSP->isPassthrough = MAL_TRUE;
+    } else {
+        pDSP->isPassthrough = MAL_FALSE;
+    }
+
+    return MAL_SUCCESS;
+}
+
+mal_uint32 mal_dsp_read_frames(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut)
+{
+    return mal_dsp_read_frames_ex(pDSP, frameCount, pFramesOut, MAL_FALSE);
+}
+
+mal_uint32 mal_dsp_read_frames_ex(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, mal_bool32 flush)
+{
+    if (pDSP == NULL || pFramesOut == NULL) return 0;
+
+    // Fast path.
+    if (pDSP->isPassthrough) {
+        return pDSP->onRead(pDSP, frameCount, pFramesOut, pDSP->pUserDataForOnRead);
+    }
+
+
+    // Slower path - where the real work is done.
+    mal_uint8 pFrames[2][MAL_MAX_CHANNELS * 512 * MAL_MAX_SAMPLE_SIZE_IN_BYTES];
+    mal_format pFramesFormat[2];
+    mal_uint32 iFrames = 0; // <-- Used as an index into pFrames and cycles between 0 and 1.
+
+    mal_uint32 totalFramesRead = 0;
+    while (frameCount > 0) {
+        iFrames = 0;
+
+        mal_uint32 framesToRead = mal_countof(pFrames[0]) / (mal_max(pDSP->config.channelsIn, pDSP->config.channelsOut) * MAL_MAX_SAMPLE_SIZE_IN_BYTES);
+        if (framesToRead > frameCount) {
+            framesToRead = frameCount;
+        }
+
+        // The initial filling of sample data depends on whether or not we are using SRC.
+        mal_uint32 framesRead = 0;
+        if (pDSP->isSRCRequired) {
+            framesRead = mal_src_read_frames_ex(&pDSP->src, framesToRead, pFrames[iFrames], flush);
+            pFramesFormat[iFrames] = pDSP->src.config.formatOut;  // Should always be f32.
+        } else {
+            framesRead = pDSP->onRead(pDSP, framesToRead, pFrames[iFrames], pDSP->pUserDataForOnRead);
+            pFramesFormat[iFrames] = pDSP->config.formatIn;
+        }
+
+        if (framesRead == 0) {
+            break;
+        }
+
+
+        // Channel mixing. The input format must be in f32 which may require a conversion.
+        if (pDSP->config.channelsIn != pDSP->config.channelsOut) {
+            if (pFramesFormat[iFrames] != mal_format_f32) {
+                mal_pcm_convert(pFrames[(iFrames + 1) % 2], mal_format_f32, pFrames[iFrames], pDSP->config.formatIn, framesRead * pDSP->config.channelsIn);
+                iFrames = (iFrames + 1) % 2;
+                pFramesFormat[iFrames] = mal_format_f32;
+            }
+
+            mal_dsp_mix_channels((float*)(pFrames[(iFrames + 1) % 2]), pDSP->config.channelsOut, pDSP->config.channelMapOut, (const float*)(pFrames[iFrames]), pDSP->config.channelsIn, pDSP->config.channelMapIn, framesRead, mal_channel_mix_mode_blend);
+            iFrames = (iFrames + 1) % 2;
+            pFramesFormat[iFrames] = mal_format_f32;
+        }
+
+
+        // Channel mapping.
+        if (pDSP->isChannelMappingRequired) {
+            for (mal_uint32 i = 0; i < framesRead; ++i) {
+                mal_rearrange_channels(pFrames[iFrames] + (i * pDSP->config.channelsOut * mal_get_sample_size_in_bytes(pFramesFormat[iFrames])), pDSP->config.channelsOut, pDSP->channelShuffleTable, pFramesFormat[iFrames]);
+            }
+        }
+
+
+        // Final conversion to output format.
+        mal_pcm_convert(pFramesOut, pDSP->config.formatOut, pFrames[iFrames], pFramesFormat[iFrames], framesRead * pDSP->config.channelsOut);
+
+        pFramesOut  = (mal_uint8*)pFramesOut + (framesRead * pDSP->config.channelsOut * mal_get_sample_size_in_bytes(pDSP->config.formatOut));
+        frameCount -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+
+mal_uint32 mal_calculate_frame_count_after_src(mal_uint32 sampleRateOut, mal_uint32 sampleRateIn, mal_uint32 frameCountIn)
+{
+    double srcRatio = (double)sampleRateOut / sampleRateIn;
+    double frameCountOutF = frameCountIn * srcRatio;
+
+    mal_uint32 frameCountOut = (mal_uint32)frameCountOutF;
+
+    // If the output frame count is fractional, make sure we add an extra frame to ensure there's enough room for that last sample.
+    if ((frameCountOutF - frameCountOut) > 0.0) {
+        frameCountOut += 1;
+    }
+
+    return frameCountOut;
+}
+
+typedef struct
+{
+    const void* pDataIn;
+    mal_format formatIn;
+    mal_uint32 channelsIn;
+    mal_uint32 totalFrameCount;
+    mal_uint32 iNextFrame;
+} mal_convert_frames__data;
+
+mal_uint32 mal_convert_frames__on_read(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData)
+{
+    (void)pDSP;
+
+    mal_convert_frames__data* pData = (mal_convert_frames__data*)pUserData;
+    mal_assert(pData != NULL);
+    mal_assert(pData->totalFrameCount >= pData->iNextFrame);
+
+    mal_uint32 framesToRead = frameCount;
+    mal_uint32 framesRemaining = (pData->totalFrameCount - pData->iNextFrame);
+    if (framesToRead > framesRemaining) {
+        framesToRead = framesRemaining;
+    }
+
+    mal_uint32 frameSizeInBytes = mal_get_sample_size_in_bytes(pData->formatIn) * pData->channelsIn;
+    mal_copy_memory(pFramesOut, (const mal_uint8*)pData->pDataIn + (frameSizeInBytes * pData->iNextFrame), frameSizeInBytes * framesToRead);
+
+    pData->iNextFrame += framesToRead;
+    return framesToRead;
+}
+
+mal_uint32 mal_convert_frames(void* pOut, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut, const void* pIn, mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_uint32 frameCountIn)
+{
+    if (frameCountIn == 0) {
+        return 0;
+    }
+
+    mal_uint32 frameCountOut = mal_calculate_frame_count_after_src(sampleRateOut, sampleRateIn, frameCountIn);
+    if (pOut == NULL) {
+        return frameCountOut;
+    }
+
+    mal_convert_frames__data data;
+    data.pDataIn = pIn;
+    data.formatIn = formatIn;
+    data.channelsIn = channelsIn;
+    data.totalFrameCount = frameCountIn;
+    data.iNextFrame = 0;
+
+    mal_dsp_config config;
+    mal_zero_object(&config);
+    config.formatIn = formatIn;
+    config.channelsIn = channelsIn;
+    config.sampleRateIn = sampleRateIn;
+    config.formatOut = formatOut;
+    config.channelsOut = channelsOut;
+    config.sampleRateOut = sampleRateOut;
+
+    mal_dsp dsp;
+    if (mal_dsp_init(&config, mal_convert_frames__on_read, &data, &dsp) != MAL_SUCCESS) {
+        return 0;
+    }
+
+    return mal_dsp_read_frames_ex(&dsp, frameCountOut, pOut, MAL_TRUE);
+}
+
+mal_dsp_config mal_dsp_config_init(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut)
+{
+    mal_dsp_config config;
+    mal_zero_object(&config);
+    config.formatIn = formatIn;
+    config.channelsIn = channelsIn;
+    config.sampleRateIn = sampleRateIn;
+    config.formatOut = formatOut;
+    config.channelsOut = channelsOut;
+    config.sampleRateOut = sampleRateOut;
+
+    return config;
+}
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Miscellaneous Helpers
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+const char* mal_get_backend_name(mal_backend backend)
+{
+    switch (backend)
+    {
+        case mal_backend_null:      return "Null";
+        case mal_backend_wasapi:    return "WASAPI";
+        case mal_backend_dsound:    return "DirectSound";
+        case mal_backend_winmm:     return "WinMM";
+        case mal_backend_alsa:      return "ALSA";
+        //case mal_backend_pulse:     return "PulseAudio";
+        //case mal_backend_jack:      return "JACK";
+        //case mal_backend_coreaudio: return "Core Audio";
+        case mal_backend_oss:       return "OSS";
+        case mal_backend_opensl:    return "OpenSL|ES";
+        case mal_backend_openal:    return "OpenAL";
+        case mal_backend_sdl:       return "SDL";
+        default:                    return "Unknown";
+    }
+}
+
+const char* mal_get_format_name(mal_format format)
+{
+    switch (format)
+    {
+        case mal_format_unknown: return "Unknown";
+        case mal_format_u8:      return "8-bit Unsigned Integer";
+        case mal_format_s16:     return "16-bit Signed Integer";
+        case mal_format_s24:     return "24-bit Signed Integer (Tightly Packed)";
+        case mal_format_s32:     return "32-bit Signed Integer";
+        case mal_format_f32:     return "32-bit IEEE Floating Point";
+        default:                 return "Invalid";
+    }
+}
+
+void mal_blend_f32(float* pOut, float* pInA, float* pInB, float factor, mal_uint32 channels)
+{
+    for (mal_uint32 i = 0; i < channels; ++i) {
+        pOut[i] = mal_mix_f32(pInA[i], pInB[i], factor);
+    }
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//
+//
+// AUTO-GENERATED
+//
+//
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// FORMAT CONVERSION
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void mal_pcm_u8_to_s16(short* pOut, const unsigned char* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x - 128;
+        r = r << 8;
+        pOut[i] = (short)r;
+    }
+}
+
+void mal_pcm_u8_to_s24(void* pOut, const unsigned char* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x - 128;
+        r = r << 16;
+        ((unsigned char*)pOut)[(i*3)+0] = (unsigned char)(r & 0xFF); ((unsigned char*)pOut)[(i*3)+1] = (unsigned char)((r & 0xFF00) >> 8); ((unsigned char*)pOut)[(i*3)+2] = (unsigned char)((r & 0xFF0000) >> 16);
+    }
+}
+
+void mal_pcm_u8_to_s32(int* pOut, const unsigned char* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x - 128;
+        r = r << 24;
+        pOut[i] = (int)r;
+    }
+}
+
+void mal_pcm_u8_to_f32(float* pOut, const unsigned char* pIn, unsigned int count)
+{
+    float r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x * 0.00784313725490196078f;
+        r = r - 1;
+        pOut[i] = (float)r;
+    }
+}
+
+void mal_pcm_s16_to_u8(unsigned char* pOut, const short* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x >> 8;
+        r = r + 128;
+        pOut[i] = (unsigned char)r;
+    }
+}
+
+void mal_pcm_s16_to_s24(void* pOut, const short* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x << 8;
+        ((unsigned char*)pOut)[(i*3)+0] = (unsigned char)(r & 0xFF); ((unsigned char*)pOut)[(i*3)+1] = (unsigned char)((r & 0xFF00) >> 8); ((unsigned char*)pOut)[(i*3)+2] = (unsigned char)((r & 0xFF0000) >> 16);
+    }
+}
+
+void mal_pcm_s16_to_s32(int* pOut, const short* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x << 16;
+        pOut[i] = (int)r;
+    }
+}
+
+void mal_pcm_s16_to_f32(float* pOut, const short* pIn, unsigned int count)
+{
+    float r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = (float)(x + 32768);
+        r = r * 0.00003051804379339284f;
+        r = r - 1;
+        pOut[i] = (float)r;
+    }
+}
+
+void mal_pcm_s24_to_u8(unsigned char* pOut, const void* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 16;
+        r = r + 128;
+        pOut[i] = (unsigned char)r;
+    }
+}
+
+void mal_pcm_s24_to_s16(short* pOut, const void* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 8;
+        pOut[i] = (short)r;
+    }
+}
+
+void mal_pcm_s24_to_s32(int* pOut, const void* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x << 8;
+        pOut[i] = (int)r;
+    }
+}
+
+void mal_pcm_s24_to_f32(float* pOut, const void* pIn, unsigned int count)
+{
+    float r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+        r = (float)(x + 8388608);
+        r = r * 0.00000011920929665621f;
+        r = r - 1;
+        pOut[i] = (float)r;
+    }
+}
+
+void mal_pcm_s32_to_u8(unsigned char* pOut, const int* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x >> 24;
+        r = r + 128;
+        pOut[i] = (unsigned char)r;
+    }
+}
+
+void mal_pcm_s32_to_s16(short* pOut, const int* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x >> 16;
+        pOut[i] = (short)r;
+    }
+}
+
+void mal_pcm_s32_to_s24(void* pOut, const int* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        r = x >> 8;
+        ((unsigned char*)pOut)[(i*3)+0] = (unsigned char)(r & 0xFF); ((unsigned char*)pOut)[(i*3)+1] = (unsigned char)((r & 0xFF00) >> 8); ((unsigned char*)pOut)[(i*3)+2] = (unsigned char)((r & 0xFF0000) >> 16);
+    }
+}
+
+void mal_pcm_s32_to_f32(float* pOut, const int* pIn, unsigned int count)
+{
+    float r;
+    for (unsigned int i = 0; i < count; ++i) {
+        int x = pIn[i];
+        double t;
+        t = (double)(x + 2147483647);
+        t = t + 1;
+        t = t * 0.0000000004656612873077392578125;
+        r = (float)(t - 1);
+        pOut[i] = (float)r;
+    }
+}
+
+void mal_pcm_f32_to_u8(unsigned char* pOut, const float* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 127.5f);
+        pOut[i] = (unsigned char)r;
+    }
+}
+
+void mal_pcm_f32_to_s16(short* pOut, const float* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5f);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+void mal_pcm_f32_to_s24(void* pOut, const float* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 8388607.5f);
+        r = r - 8388608;
+        ((unsigned char*)pOut)[(i*3)+0] = (unsigned char)(r & 0xFF); ((unsigned char*)pOut)[(i*3)+1] = (unsigned char)((r & 0xFF00) >> 8); ((unsigned char*)pOut)[(i*3)+2] = (unsigned char)((r & 0xFF0000) >> 16);
+    }
+}
+
+void mal_pcm_f32_to_s32(int* pOut, const float* pIn, unsigned int count)
+{
+    int r;
+    for (unsigned int i = 0; i < count; ++i) {
+        float x = pIn[i];
+        float c;
+        mal_int64 t;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        t = (mal_int64)(c * 2147483647.5);
+        t = t - 2147483647;
+        r = (int)(t - 1);
+        pOut[i] = (int)r;
+    }
+}
+
+#endif
+
+
+// REVISION HISTORY
+// ================
+//
+// v0.x - 2017-xx-xx
+//   - API CHANGE: Expose and improve mutex APIs. If you were using the mutex APIs before this version you'll
+//     need to update.
+//   - API CHANGE: SRC and DSP callbacks now take a pointer to a mal_src and mal_dsp object respectively.
+//   - API CHANGE: Improvements to event and thread APIs. These changes make these APIs more consistent.
+//   - Add support for SDL and Emscripten.
+//   - Simplify the build system further for when development packages for various backends are not installed.
+//     With this change, when the compiler supports __has_include, backends without the relevant development
+//     packages installed will be ignored. This fixes the build for old versions of MinGW.
+//   - Fixes to the Android build.
+//   - Add mal_convert_frames(). This is a high-level helper API for performing a one-time, bulk conversion of
+//     audio data to a different format.
+//   - Improvements to f32 -> u8/s16/s24/s32 conversion routines.
+//   - Fix a bug where the wrong value is returned from mal_device_start() for the OpenSL and SDL backends.
+//   - Fixes and improvements for Raspberry Pi.
+//   - Warning fixes.
+//
+// v0.5 - 2017-11-11
+//   - API CHANGE: The mal_context_init() function now takes a pointer to a mal_context_config object for
+//     configuring the context. The works in the same kind of way as the device config. The rationale for this
+//     change is to give applications better control over context-level properties, add support for backend-
+//     specific configurations, and support extensibility without breaking the API.
+//   - API CHANGE: The alsa.preferPlugHW device config variable has been removed since it's not really useful for
+//     anything anymore.
+//   - ALSA: By default, device enumeration will now only enumerate over unique card/device pairs. Applications
+//     can enable verbose device enumeration by setting the alsa.useVerboseDeviceEnumeration context config
+//     variable.
+//   - ALSA: When opening a device in shared mode (the default), the dmix/dsnoop plugin will be prioritized. If
+//     this fails it will fall back to the hw plugin. With this change the preferExclusiveMode config is now
+//     honored. Note that this does not happen when alsa.useVerboseDeviceEnumeration is set to true (see above)
+//     which is by design.
+//   - ALSA: Add support for excluding the "null" device using the alsa.excludeNullDevice context config variable.
+//   - ALSA: Fix a bug with channel mapping which causes an assertion to fail.
+//   - Fix errors with enumeration when pInfo is set to NULL.
+//   - OSS: Fix a bug when starting a device when the client sends 0 samples for the initial buffer fill.
+//
+// v0.4 - 2017-11-05
+//   - API CHANGE: The log callback is now per-context rather than per-device and as is thus now passed to
+//     mal_context_init(). The rationale for this change is that it allows applications to capture diagnostic
+//     messages at the context level. Previously this was only available at the device level.
+//   - API CHANGE: The device config passed to mal_device_init() is now const.
+//   - Added support for OSS which enables support on BSD platforms.
+//   - Added support for WinMM (waveOut/waveIn).
+//   - Added support for UWP (Universal Windows Platform) applications. Currently C++ only.
+//   - Added support for exclusive mode for selected backends. Currently supported on WASAPI.
+//   - POSIX builds no longer require explicit linking to libpthread (-lpthread).
+//   - ALSA: Explicit linking to libasound (-lasound) is no longer required.
+//   - ALSA: Latency improvements.
+//   - ALSA: Use MMAP mode where available. This can be disabled with the alsa.noMMap config.
+//   - ALSA: Use "hw" devices instead of "plughw" devices by default. This can be disabled with the
+//     alsa.preferPlugHW config.
+//   - WASAPI is now the highest priority backend on Windows platforms.
+//   - Fixed an error with sample rate conversion which was causing crackling when capturing.
+//   - Improved error handling.
+//   - Improved compiler support.
+//   - Miscellaneous bug fixes.
+//
+// v0.3 - 2017-06-19
+//   - API CHANGE: Introduced the notion of a context. The context is the highest level object and is required for
+//     enumerating and creating devices. Now, applications must first create a context, and then use that to
+//     enumerate and create devices. The reason for this change is to ensure device enumeration and creation is
+//     tied to the same backend. In addition, some backends are better suited to this design.
+//   - API CHANGE: Removed the rewinding APIs because they're too inconsistent across the different backends, hard
+//     to test and maintain, and just generally unreliable.
+//   - Added helper APIs for initializing mal_device_config objects.
+//   - Null Backend: Fixed a crash when recording.
+//   - Fixed build for UWP.
+//   - Added support for f32 formats to the OpenSL|ES backend.
+//   - Added initial implementation of the WASAPI backend.
+//   - Added initial implementation of the OpenAL backend.
+//   - Added support for low quality linear sample rate conversion.
+//   - Added early support for basic channel mapping.
+//
+// v0.2 - 2016-10-28
+//   - API CHANGE: Add user data pointer as the last parameter to mal_device_init(). The rationale for this
+//     change is to ensure the logging callback has access to the user data during initialization.
+//   - API CHANGE: Have device configuration properties be passed to mal_device_init() via a structure. Rationale:
+//     1) The number of parameters is just getting too much.
+//     2) It makes it a bit easier to add new configuration properties in the future. In particular, there's a
+//        chance there will be support added for backend-specific properties.
+//   - Dropped support for f64, A-law and Mu-law formats since they just aren't common enough to justify the
+//     added maintenance cost.
+//   - DirectSound: Increased the default buffer size for capture devices.
+//   - Added initial implementation of the OpenSL|ES backend.
+//
+// v0.1 - 2016-10-21
+//   - Initial versioned release.
+
+
+/*
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+*/
diff --git a/raylib/gestures.h b/raylib/gestures.h
index 60d5172..68bdc11 100644
--- a/raylib/gestures.h
+++ b/raylib/gestures.h
@@ -140,17 +140,25 @@ float GetGesturePinchAngle(void);                       // Get gesture pinch ang
 
 #if defined(GESTURES_IMPLEMENTATION)
 
-#include <math.h>               // Required for: atan2(), sqrt()
-#include <stdint.h>             // Required for: uint64_t
-
 #if defined(_WIN32)
     // Functions required to query time on Windows
     int __stdcall QueryPerformanceCounter(unsigned long long int *lpPerformanceCount);
     int __stdcall QueryPerformanceFrequency(unsigned long long int *lpFrequency);
 #elif defined(__linux__)
-    //#define _POSIX_C_SOURCE 199309L // Required for CLOCK_MONOTONIC if compiled with c99 without gnu ext.
+    #if _POSIX_C_SOURCE < 199309L
+        #undef _POSIX_C_SOURCE
+        #define _POSIX_C_SOURCE 199309L // Required for CLOCK_MONOTONIC if compiled with c99 without gnu ext.
+    #endif
     #include <sys/time.h>           // Required for: timespec
     #include <time.h>               // Required for: clock_gettime()
+
+    #include <math.h>               // Required for: atan2(), sqrt()
+    #include <stdint.h>             // Required for: uint64_t
+#endif
+
+#if defined(__APPLE__)              // macOS also defines __MACH__
+    #include <mach/clock.h>         // Required for: clock_get_time()
+    #include <mach/mach.h>          // Required for: mach_timespec_t
 #endif
 
 //----------------------------------------------------------------------------------
@@ -529,6 +537,22 @@ static double GetCurrentTime(void)
     time = ((double)nowTime/1000000.0);     // Time in miliseconds
 #endif
 
+#if defined(__APPLE__)
+    //#define CLOCK_REALTIME  CALENDAR_CLOCK
+    //#define CLOCK_MONOTONIC SYSTEM_CLOCK
+    
+    clock_serv_t cclock;
+    mach_timespec_t now;
+    host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock);
+    
+    // NOTE: OS X does not have clock_gettime(), using clock_get_time()
+    clock_get_time(cclock, &now);
+    mach_port_deallocate(mach_task_self(), cclock);
+    uint64_t nowTime = (uint64_t)now.tv_sec*1000000000LLU + (uint64_t)now.tv_nsec;     // Time in nanoseconds
+
+    time = ((double)nowTime/1000000.0);     // Time in miliseconds    
+#endif
+
     return time;
 }
 
diff --git a/raylib/mini_al.c b/raylib/mini_al.c
new file mode 100644
index 0000000..7b43785
--- /dev/null
+++ b/raylib/mini_al.c
@@ -0,0 +1,4 @@
+// The implementation of mini_al needs to #include windows.h which means it needs to go into
+// it's own translation unit. Not doing this will cause conflicts with CloseWindow(), etc.
+#define MAL_IMPLEMENTATION
+#include "mini_al.h"
\ No newline at end of file
diff --git a/raylib/raylib.h b/raylib/raylib.h
index e5ad8a9..a3914c4 100644
--- a/raylib/raylib.h
+++ b/raylib/raylib.h
@@ -1,23 +1,23 @@
 /**********************************************************************************************
 *
-*   raylib v1.8.0 
+*   raylib v1.9-dev
 *
 *   A simple and easy-to-use library to learn videogames programming (www.raylib.com)
 *
 *   FEATURES:
 *       - Written in plain C code (C99) in PascalCase/camelCase notation
-*       - Multiple platforms support: Windows, Linux, Mac, Android, Raspberry Pi and HTML5
-*       - Hardware accelerated with OpenGL (1.1, 2.1, 3.3 or ES 2.0)
+*       - Hardware accelerated with OpenGL (1.1, 2.1, 3.3 or ES2 - choose at compile)
 *       - Unique OpenGL abstraction layer (usable as standalone module): [rlgl]
-*       - Powerful fonts module with SpriteFonts support (XNA bitmap fonts, AngelCode fonts, TTF)
-*       - Outstanding texture formats support, including compressed formats (DXT, ETC, PVRT, ASTC)
-*       - Basic 3d support for Geometrics, Models, Billboards, Heightmaps and Cubicmaps
+*       - Powerful fonts module with SpriteFonts support (XNA fonts, AngelCode fonts, TTF)
+*       - Outstanding texture formats support, including compressed formats (DXT, ETC, ASTC)
+*       - Full 3d support for 3d Shapes, Models, Billboards, Heightmaps and more!
 *       - Flexible Materials system, supporting classic maps and PBR maps
 *       - Shaders support, including Model shaders and Postprocessing shaders
-*       - Powerful math module for Vector2, Vector3, Matrix and Quaternion operations: [raymath]
-*       - Audio loading and playing with streaming support and mixing channels: [audio]
-*       - VR stereo rendering support with configurable HMD device parameters
-*       - Minimal external dependencies (GLFW3, OpenGL, OpenAL)
+*       - Powerful math module for Vector, Matrix and Quaternion operations: [raymath]
+*       - Audio loading and playing with streaming support (WAV, OGG, FLAC, XM, MOD)
+*       - Multiple platforms support: Windows, Linux, FreeBSD, MacOS, UWP, Android, Raspberry Pi, HTML5.
+*       - VR stereo rendering with configurable HMD device parameters
+*       - NO external dependencies, all required libraries included with raylib
 *       - Complete bindings to LUA (raylib-lua) and Go (raylib-go)
 *
 *   NOTES:
@@ -25,17 +25,17 @@
 *       If using OpenGL 3.3 or ES2, one default shader is loaded automatically (internally defined) [rlgl]
 *       If using OpenGL 3.3 or ES2, several vertex buffers (VAO/VBO) are created to manage lines-triangles-quads
 *
-*   DEPENDENCIES:
-*       GLFW3 (www.glfw.org) for window/context management and input [core]
-*       GLAD for OpenGL extensions loading (3.3 Core profile, only PLATFORM_DESKTOP) [rlgl]
-*       OpenAL Soft for audio device/context management [audio]
+*   DEPENDENCIES (included):
+*       rglfw (github.com/glfw/glfw) for window/context management and input (only PLATFORM_DESKTOP) [core]
+*       glad (github.com/Dav1dde/glad) for OpenGL extensions loading (3.3 Core profile, only PLATFORM_DESKTOP) [rlgl]
+*       mini_al (github.com/dr-soft/mini_al) for audio device/context management [audio]
 *
-*   OPTIONAL DEPENDENCIES:
-*       stb_image (Sean Barret) for images loading (JPEG, PNG, BMP, TGA) [textures]
+*   OPTIONAL DEPENDENCIES (included):
+*       stb_image (Sean Barret) for images loading (BMP, TGA, PNG, JPEG, HDR...) [textures]
 *       stb_image_resize (Sean Barret) for image resizing algorythms [textures]
 *       stb_image_write (Sean Barret) for image writting (PNG) [utils]
 *       stb_truetype (Sean Barret) for ttf fonts loading [text]
-*       stb_vorbis (Sean Barret) for ogg audio loading [audio]
+*       stb_vorbis (Sean Barret) for OGG audio loading [audio]
 *       stb_perlin (Sean Barret) for Perlin noise image generation [textures]
 *       par_shapes (Philip Rideout) for parametric 3d shapes generation [models]
 *       jar_xm (Joshua Reisenauer) for XM audio module loading [audio]
@@ -50,7 +50,7 @@
 *   raylib is licensed under an unmodified zlib/libpng license, which is an OSI-certified,
 *   BSD-like license that allows static linking with closed source software:
 *
-*   Copyright (c) 2013-2017 Ramon Santamaria (@raysan5)
+*   Copyright (c) 2013-2018 Ramon Santamaria (@raysan5)
 *
 *   This software is provided "as-is", without any express or implied warranty. In no event
 *   will the authors be held liable for any damages arising from the use of this software.
@@ -470,6 +470,8 @@ typedef struct Wave {
 
 // Sound source type
 typedef struct Sound {
+    void* audioBuffer;      // A pointer to internal data used by the audio system.
+
     unsigned int source;    // OpenAL audio source id
     unsigned int buffer;    // OpenAL audio buffer id
     int format;             // OpenAL audio format specifier
@@ -486,6 +488,8 @@ typedef struct AudioStream {
     unsigned int sampleSize;    // Bit depth (bits per sample): 8, 16, 32 (24 not supported)
     unsigned int channels;      // Number of channels (1-mono, 2-stereo)
 
+    void* audioBuffer;          // A pointer to internal data used by the audio system.
+
     int format;                 // OpenAL audio format specifier
     unsigned int source;        // OpenAL audio source id
     unsigned int buffers[2];    // OpenAL audio buffers (double buffering)
@@ -1126,7 +1130,7 @@ RLAPI void ResumeMusicStream(Music music);                            // Resume
 RLAPI bool IsMusicPlaying(Music music);                               // Check if music is playing
 RLAPI void SetMusicVolume(Music music, float volume);                 // Set volume for music (1.0 is max level)
 RLAPI void SetMusicPitch(Music music, float pitch);                   // Set pitch for a music (1.0 is base level)
-RLAPI void SetMusicLoopCount(Music music, float count);               // Set music loop count (loop repeats)
+RLAPI void SetMusicLoopCount(Music music, int count);                 // Set music loop count (loop repeats)
 RLAPI float GetMusicTimeLength(Music music);                          // Get music time length (in seconds)
 RLAPI float GetMusicTimePlayed(Music music);                          // Get current music time played (in seconds)
 
@@ -1139,7 +1143,10 @@ RLAPI bool IsAudioBufferProcessed(AudioStream stream);                // Check i
 RLAPI void PlayAudioStream(AudioStream stream);                       // Play audio stream
 RLAPI void PauseAudioStream(AudioStream stream);                      // Pause audio stream
 RLAPI void ResumeAudioStream(AudioStream stream);                     // Resume audio stream
+RLAPI bool IsAudioStreamPlaying(AudioStream stream);                  // Check if audio stream is playing
 RLAPI void StopAudioStream(AudioStream stream);                       // Stop audio stream
+RLAPI void SetAudioStreamVolume(AudioStream stream, float volume);     // Set volume for audio stream (1.0 is max level)
+RLAPI void SetAudioStreamPitch(AudioStream stream, float pitch);       // Set pitch for audio stream (1.0 is base level)
 
 #ifdef __cplusplus
 }
diff --git a/raylib/raymath.h b/raylib/raymath.h
index fe0b894..decd02c 100644
--- a/raylib/raymath.h
+++ b/raylib/raymath.h
@@ -191,8 +191,8 @@ RMDEF void QuaternionNormalize(Quaternion *q);                  // Normalize pro
 RMDEF void QuaternionInvert(Quaternion *quat);                  // Invert provided quaternion
 RMDEF Quaternion QuaternionMultiply(Quaternion q1, Quaternion q2);    // Calculate two quaternion multiplication
 RMDEF Quaternion QuaternionLerp(Quaternion q1, Quaternion q2, float amount);    // Calculate linear interpolation between two quaternions
-RMDEF Quaternion QuaternionSlerp(Quaternion q1, Quaternion q2, float amount);   // Calculates spherical linear interpolation between two quaternions
 RMDEF Quaternion QuaternionNlerp(Quaternion q1, Quaternion q2, float amount);   // Calculate slerp-optimized interpolation between two quaternions
+RMDEF Quaternion QuaternionSlerp(Quaternion q1, Quaternion q2, float amount);   // Calculates spherical linear interpolation between two quaternions
 RMDEF Quaternion QuaternionFromVector3ToVector3(Vector3 from, Vector3 to);      // Calculate quaternion based on the rotation from one vector to another
 RMDEF Quaternion QuaternionFromMatrix(Matrix matrix);                 // Returns a quaternion for a given rotation matrix
 RMDEF Matrix QuaternionToMatrix(Quaternion q);                        // Returns a matrix for a given quaternion
@@ -1083,6 +1083,15 @@ RMDEF Quaternion QuaternionLerp(Quaternion q1, Quaternion q2, float amount)
     return result;
 }
 
+// Calculate slerp-optimized interpolation between two quaternions
+RMDEF Quaternion QuaternionNlerp(Quaternion q1, Quaternion q2, float amount)
+{
+    Quaternion result = QuaternionLerp(q1, q2, amount);
+    QuaternionNormalize(&result);
+    
+    return result;
+}
+
 // Calculates spherical linear interpolation between two quaternions
 RMDEF Quaternion QuaternionSlerp(Quaternion q1, Quaternion q2, float amount)
 {
@@ -1119,15 +1128,6 @@ RMDEF Quaternion QuaternionSlerp(Quaternion q1, Quaternion q2, float amount)
     return result;
 }
 
-// Calculate slerp-optimized interpolation between two quaternions
-RMDEF Quaternion QuaternionNlerp(Quaternion q1, Quaternion q2, float amount)
-{
-    Quaternion result = QuaternionLerp(q1, q2, amount);
-    QuaternionNormalize(&result);
-    
-    return result;
-}
-
 // Calculate quaternion based on the rotation from one vector to another
 RMDEF Quaternion QuaternionFromVector3ToVector3(Vector3 from, Vector3 to)
 {
diff --git a/raylib/rlgl.c b/raylib/rlgl.c
index f273e55..6ae7df4 100644
--- a/raylib/rlgl.c
+++ b/raylib/rlgl.c
@@ -3362,8 +3362,8 @@ static void SetShaderDefaultLocations(Shader *shader)
     // Get handles to GLSL uniform locations (fragment shader)
     shader->locs[LOC_COLOR_DIFFUSE] = glGetUniformLocation(shader->id, "colDiffuse");
     shader->locs[LOC_MAP_DIFFUSE] = glGetUniformLocation(shader->id, "texture0");
-    shader->locs[LOC_MAP_NORMAL] = glGetUniformLocation(shader->id, "texture1");
-    shader->locs[LOC_MAP_SPECULAR] = glGetUniformLocation(shader->id, "texture2");
+    shader->locs[LOC_MAP_SPECULAR] = glGetUniformLocation(shader->id, "texture1");
+    shader->locs[LOC_MAP_NORMAL] = glGetUniformLocation(shader->id, "texture2");
 }
 
 // Unload default shader
diff --git a/raylib/text.c b/raylib/text.c
index ff55ae6..8db2fc9 100644
--- a/raylib/text.c
+++ b/raylib/text.c
@@ -883,11 +883,10 @@ static SpriteFont LoadTTF(const char *fileName, int fontSize, int charsCount, in
     scale = stbtt_ScaleForPixelHeight(&fontInfo, fontSize);
     stbtt_GetFontVMetrics(&fontInfo, &ascent, 0, 0);
     baseline = (int)(ascent*scale);
-
     
     if (fontChars[0] != 32) TraceLog(LOG_WARNING, "TTF spritefont loading: first character is not SPACE(32) character");
 
-    // NOTE: Using stb_truetype crappy packing method, no guarante the font fits the image...
+    // NOTE: Using stb_truetype crappy packing method, no guarantee the font fits the image...
     // TODO: Replace this function by a proper packing method and support random chars order,
     // we already receive a list (fontChars) with the ordered expected characters
     int result = stbtt_BakeFontBitmap(ttfBuffer, 0, fontSize, dataBitmap, textureSize, textureSize, fontChars[0], charsCount, charData);
diff --git a/raylib/textures.c b/raylib/textures.c
index 814c302..090de24 100644
--- a/raylib/textures.c
+++ b/raylib/textures.c
@@ -58,6 +58,8 @@
 #define SUPPORT_FILEFORMAT_PNG
 #define SUPPORT_FILEFORMAT_DDS
 #define SUPPORT_FILEFORMAT_HDR
+#define SUPPORT_FILEFORMAT_KTX
+#define SUPPORT_FILEFORMAT_ASTC
 #define SUPPORT_IMAGE_MANIPULATION
 #define SUPPORT_IMAGE_GENERATION
 //-------------------------------------------------
@@ -536,14 +538,13 @@ Image GetTextureData(Texture2D texture)
         {
             image.width = texture.width;
             image.height = texture.height;
+            image.format = texture.format;
             image.mipmaps = 1;
             
-            if (rlGetVersion() == OPENGL_ES_20)
-            {
-                // NOTE: Data retrieved on OpenGL ES 2.0 comes as RGBA (from framebuffer)
-                image.format = UNCOMPRESSED_R8G8B8A8;
-            }
-            else image.format = texture.format;
+            // NOTE: Data retrieved on OpenGL ES 2.0 should be RGBA
+            // coming from FBO color buffer, but it seems original
+            // texture format is retrieved on RPI... weird...
+            //image.format = UNCOMPRESSED_R8G8B8A8;
 
             TraceLog(LOG_INFO, "Texture pixel data obtained successfully");
         }
@@ -622,9 +623,9 @@ void ImageFormat(Image *image, int newFormat)
 
                     for (int i = 0; i < image->width*image->height; i++)
                     {
-                        r = (unsigned char)(round((float)pixels[k].r*31/255));
-                        g = (unsigned char)(round((float)pixels[k].g*63/255));
-                        b = (unsigned char)(round((float)pixels[k].b*31/255));
+                        r = (unsigned char)(round((float)pixels[i].r*31.0f/255));
+                        g = (unsigned char)(round((float)pixels[i].g*63.0f/255));
+                        b = (unsigned char)(round((float)pixels[i].b*31.0f/255));
 
                         ((unsigned short *)image->data)[i] = (unsigned short)r << 11 | (unsigned short)g << 5 | (unsigned short)b;
                     }
@@ -655,9 +656,9 @@ void ImageFormat(Image *image, int newFormat)
 
                     for (int i = 0; i < image->width*image->height; i++)
                     {
-                        r = (unsigned char)(round((float)pixels[i].r*31/255));
-                        g = (unsigned char)(round((float)pixels[i].g*31/255));
-                        b = (unsigned char)(round((float)pixels[i].b*31/255));
+                        r = (unsigned char)(round((float)pixels[i].r*31.0f/255));
+                        g = (unsigned char)(round((float)pixels[i].g*31.0f/255));
+                        b = (unsigned char)(round((float)pixels[i].b*31.0f/255));
                         a = (pixels[i].a > ALPHA_THRESHOLD) ? 1 : 0;
 
                         ((unsigned short *)image->data)[i] = (unsigned short)r << 11 | (unsigned short)g << 6 | (unsigned short)b << 1 | (unsigned short)a;
@@ -675,12 +676,12 @@ void ImageFormat(Image *image, int newFormat)
 
                     for (int i = 0; i < image->width*image->height; i++)
                     {
-                        r = (unsigned char)(round((float)pixels[i].r*15/255));
-                        g = (unsigned char)(round((float)pixels[i].g*15/255));
-                        b = (unsigned char)(round((float)pixels[i].b*15/255));
-                        a = (unsigned char)(round((float)pixels[i].a*15/255));
-
-                        ((unsigned short *)image->data)[i] = (unsigned short)r << 12 | (unsigned short)g << 8| (unsigned short)b << 4| (unsigned short)a;
+                        r = (unsigned char)(round((float)pixels[i].r*15.0f/255));
+                        g = (unsigned char)(round((float)pixels[i].g*15.0f/255));
+                        b = (unsigned char)(round((float)pixels[i].b*15.0f/255));
+                        a = (unsigned char)(round((float)pixels[i].a*15.0f/255));
+                        
+                        ((unsigned short *)image->data)[i] = (unsigned short)r << 12 | (unsigned short)g << 8 | (unsigned short)b << 4 | (unsigned short)a;
                     }
 
                 } break;
@@ -801,7 +802,7 @@ void ImageToPOT(Image *image, Color fillColor)
 // Copy an image to a new image
 Image ImageCopy(Image image)
 {
-    Image newImage;
+    Image newImage = { 0 };
 
     int byteSize = image.width*image.height;
 
@@ -1087,7 +1088,8 @@ Image ImageTextEx(SpriteFont font, const char *text, float fontSize, int spacing
         ImageDraw(&imText, imFont, letter.rec, (Rectangle){ posX + letter.offsetX, 
                   letter.offsetY, letter.rec.width, letter.rec.height });
 
-        posX += letter.advanceX + spacing;
+        if (letter.advanceX == 0) posX += letter.rec.width + spacing;
+        else posX += letter.advanceX + spacing;
     }
 
     UnloadImage(imFont);