diff --git a/.github/workflows/ffmpeg.yml b/.github/workflows/ffmpeg.yml
index 5d04cb354..8e2cea8f7 100644
--- a/.github/workflows/ffmpeg.yml
+++ b/.github/workflows/ffmpeg.yml
@@ -19,6 +19,9 @@ jobs:
           - os: macos-latest
             CC: clang
             CXX: clang++
+          - os: windows-latest
+            CC: cl
+            CXX: cl
     runs-on: ${{ matrix.os }}
     env:
       CC: ${{ matrix.CC }}
@@ -29,6 +32,7 @@ jobs:
         with:
           python-version: 3.11
       - name: Install meson and ninja
+        if: matrix.os != 'windows-latest'
         run: |
           python -m pip install --upgrade pip
           pip install meson
@@ -41,20 +45,60 @@ jobs:
         if: matrix.os == 'macos-latest'
         run: |
           brew install -q ninja nasm
+      - name: Install dependencies (windows)
+        if: matrix.os == 'windows-latest'
+        run: |
+          choco install -y nasm pkgconfiglite
+          pip install meson ninja
+      - name: Setup MSVC
+        if: matrix.os == 'windows-latest'
+        uses: ilammy/msvc-dev-cmd@v1
       - uses: actions/checkout@v6
-      - name: Build vmaf
+      - name: Build vmaf (unix)
+        if: matrix.os != 'windows-latest'
         run: |
           meson setup libvmaf libvmaf/build --buildtype release
           sudo ninja -vC libvmaf/build install
-      - name: Prepare FFmpeg
+      - name: Build vmaf (windows)
+        if: matrix.os == 'windows-latest'
+        shell: pwsh
+        run: |
+          meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix "C:/vmaf"
+          meson install -C libvmaf/build
+      - name: Prepare static libs for MSVC
+        if: matrix.os == 'windows-latest'
+        shell: pwsh
+        run: |
+          Rename-Item -Path C:\vmaf\lib\libpthreadVC3.a  -NewName C:\vmaf\lib\pthreadVC3.lib
+          Rename-Item -Path C:\vmaf\lib\libpthreadVCE3.a -NewName C:\vmaf\lib\pthreadVCE3.lib
+          Rename-Item -Path C:\vmaf\lib\libpthreadVSE3.a -NewName C:\vmaf\lib\pthreadVSE3.lib
+          Rename-Item -Path C:\vmaf\lib\libvmaf.a        -NewName C:\vmaf\lib\vmaf.lib
+          echo "INCLUDE=C:/vmaf/include;$env:INCLUDE" >> $env:GITHUB_ENV
+          echo "LIB=C:/vmaf/lib;$env:LIB" >> $env:GITHUB_ENV
+          echo "PATH=C:/vmaf/bin;$env:PATH" >> $env:GITHUB_ENV
+          echo "PKG_CONFIG_PATH=C:/vmaf/lib/pkgconfig;$env:PKG_CONFIG_PATH" >> $env:GITHUB_ENV
+      - name: Prepare FFmpeg (unix)
+        if: matrix.os != 'windows-latest'
         run: |
           git clone -q --branch master --depth=1 "https://github.com/FFmpeg/FFmpeg" ffmpeg
           cd ffmpeg
           ./configure --enable-version3 --enable-libvmaf --disable-indevs --cc="$CC" --cxx="$CXX" || { less ffbuild/config.log; exit 1; }
-      - name: Make FFmpeg
+      - name: Prepare FFmpeg (windows)
+        if: matrix.os == 'windows-latest'
+        run: |
+          git clone --depth=1 https://gitlab.freedesktop.org/gstreamer/meson-ports/ffmpeg.git ffmpeg
+          cd ffmpeg
+          meson setup build --buildtype release -Dversion3=enabled -Dindevs=disabled -Dlibvmaf=enabled -Dprograms=enabled --prefix "C:/ffmpeg"
+      - name: Make FFmpeg (unix)
+        if: matrix.os != 'windows-latest'
         run: |
           sudo make -C ffmpeg --quiet -j $(getconf _NPROCESSORS_ONLN 2>/dev/null || sysctl -n hw.ncpu) install
-      - name: Test ffmpeg
+      - name: Make FFmpeg (windows)
+        if: matrix.os == 'windows-latest'
+        run: |
+          meson install -C ffmpeg/build
+      - name: Test ffmpeg (unix)
+        if: matrix.os != 'windows-latest'
         run: |
           curl "https://gist.githubusercontent.com/1480c1/0c4575da638ef6e8203feffd0597de16/raw/akiyo_cif.tar.xz.base64" | base64 -d | tar xJ
           vmaf_score=$(ffmpeg -hide_banner -nostats -i encoded.mkv -i orig.mkv -filter_complex libvmaf -f null - 2>&1 | grep 'VMAF score' | tr ' ' '\n' | tail -n1)
@@ -67,3 +111,21 @@ jobs:
             exit 0
           fi
         continue-on-error: true
+      - name: Test ffmpeg (MSVC)
+        if: matrix.os == 'windows-latest'
+        shell: pwsh
+        run: |
+          curl "https://gist.githubusercontent.com/1480c1/0c4575da638ef6e8203feffd0597de16/raw/akiyo_cif.tar.xz.base64" | base64 -d | tar xJ
+          $ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe"
+          $vmaf_score = (& $ffmpeg -hide_banner -nostats -i encoded.mkv -i orig.mkv -filter_complex libvmaf -f null - 2>&1 |
+                         Select-String "VMAF score" |
+                         ForEach-Object { $_.ToString().Split()[-1] }).Trim()
+          echo "VMAF score: $vmaf_score"
+          if ($vmaf_score -ne "93.663925") {
+            echo "vmaf score doesn't match 93.663925"
+            exit 1
+          } else {
+            echo "vmaf score matches"
+            exit 0
+          }
+        continue-on-error: true
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index d8763d6bb..128148fad 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -4,78 +4,77 @@ on:
   pull_request:
 
 jobs:
-  build:
+  build-windows:
     runs-on: windows-latest
+
     strategy:
       fail-fast: false
       matrix:
-        include:
-          # Disabled 32-bit job due to vmaf score mismatch
-          #- msystem: MINGW32
-          #  MINGW_PACKAGE_PREFIX: mingw-w64-i686
-          #  CFLAGS: -msse2 -mfpmath=sse -mstackrealign
-          - msystem: MINGW64
-            MINGW_PACKAGE_PREFIX: mingw-w64-x86_64
-    env:
-      CC: ccache gcc
-      CXX: ccache g++
-      CFLAGS: -pipe -static -O3 -mtune=generic -D_FILE_OFFSET_BITS=64 -mthreads ${{ matrix.CFLAGS }}
-      CXXFLAGS: -pipe -static -O3 -mtune=generic -D_FILE_OFFSET_BITS=64 -mthreads ${{ matrix.CXXFLAGS }}
-      LDFLAGS: -pipe -static -static-libgcc -static-libstdc++ ${{ matrix.LDFLAGS }}
-    defaults:
-      run:
-        shell: msys2 {0}
+        build:
+          - type: mingw
+            msystem: MINGW64
+            prefix: mingw-w64-x86_64
+          - type: msvc
+
     steps:
       - uses: actions/checkout@v6
         with:
+          submodules: true
           fetch-depth: 0
-      - name: Cache ccache files
-        uses: actions/cache@v4
-        with:
-          path: |
-            .ccache
-          key: ${{ runner.os }}-${{ matrix.msystem }}-0-${{ hashFiles('**/*.c') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ matrix.msystem }}-0-
-            ${{ runner.os }}-${{ matrix.msystem }}-
 
-      - uses: msys2/setup-msys2@v2
+      - name: Setup MSYS2
+        if: matrix.build.type == 'mingw'
+        uses: msys2/setup-msys2@v2
         with:
-          msystem: ${{ matrix.msystem }}
-          install: ${{ matrix.MINGW_PACKAGE_PREFIX }}-ccache ${{ matrix.MINGW_PACKAGE_PREFIX }}-nasm ${{ matrix.MINGW_PACKAGE_PREFIX }}-pkg-config ${{ matrix.MINGW_PACKAGE_PREFIX }}-gcc ${{ matrix.MINGW_PACKAGE_PREFIX }}-meson ${{ matrix.MINGW_PACKAGE_PREFIX }}-ninja vim make diffutils
+          msystem: ${{ matrix.build.msystem }}
+          install: ${{ matrix.build.prefix }}-ccache ${{ matrix.build.prefix }}-nasm ${{ matrix.build.prefix }}-pkg-config ${{ matrix.build.prefix }}-gcc ${{ matrix.build.prefix }}-meson ${{ matrix.build.prefix }}-ninja vim make diffutils
           update: true
           path-type: inherit
 
-      - name: Set ccache dir
-        run: echo "name=CCACHE_DIR::$PWD/.ccache" >> $GITHUB_ENV
+      - name: Build (MINGW)
+        if: matrix.build.type == 'mingw'
+        shell: msys2 {0}
+        run: |
+          echo "name=CCACHE_DIR::$PWD/.ccache" >> $GITHUB_ENV
+          meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix "$MINGW_PREFIX"
+          meson install -C libvmaf/build
+          meson test -C libvmaf/build --num-processes $(nproc)
+
+          # Export MINGW_PREFIX to the global environment
+          echo "MINGW_PREFIX=$(cygpath -m "$MINGW_PREFIX")" >> $GITHUB_ENV
 
-      - name: Configure vmaf
-        run: meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix "$MINGW_PREFIX"
-      - name: Build vmaf
-        run: meson install -C libvmaf/build
+      - name: Upload MINGW build
+        if: matrix.build.type == 'mingw'
+        uses: actions/upload-artifact@v5
+        with:
+          name: mingw-vmaf
+          path: ${{ env.MINGW_PREFIX }}/bin/vmaf.exe
 
-      - name: Test vmaf
-        run: meson test -C libvmaf/build --num-processes $(nproc)
+      - name: Setup MSVC environment
+        if: matrix.build.type == 'msvc'
+        uses: ilammy/msvc-dev-cmd@v1
+
+      - name: Install Meson and Ninja
+        if: matrix.build.type == 'msvc'
+        run: |
+          pip install meson ninja
 
-      - name: Get binary path & Current Release
-        id: get_info
+      - name: Install build tools
+        if: matrix.build.type == 'msvc'
         run: |
-          ldd "$MINGW_PREFIX/bin/vmaf.exe" || true
-          echo "path=$(cygpath -m "$(command -v vmaf)")" >> $GITHUB_OUTPUT
-          echo "upload_url=$(curl -L https://api.github.com/repos/${{ github.repository }}/releases/tags/$(cut -d/ -f3 <<< ${{ github.ref }}) | jq -r ."upload_url")" >> $GITHUB_OUTPUT
+          choco install xxd -y
 
-      - name: Upload vmaf
+      - name: Build (MSVC)
+        if: matrix.build.type == 'msvc'
+        shell: cmd
+        run: |
+          meson setup libvmaf libvmaf/build-msvc --buildtype release --default-library static --prefix C:/vmaf-install-msvc
+          meson install -C libvmaf/build-msvc
+          meson test -C libvmaf/build-msvc
+
+      - name: Upload MSVC build
+        if: matrix.build.type == 'msvc'
         uses: actions/upload-artifact@v5
         with:
-          name: ${{ matrix.msystem }}-vmaf
-          path: ${{ steps.get_info.outputs.path }}
-      - name: Upload vmaf
-        if: steps.get_info.outputs.upload_url != 'null' && matrix.msystem == 'MINGW64'
-        uses: actions/upload-release-asset@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          upload_url: ${{ steps.get_info.outputs.upload_url }}
-          asset_path: ${{ steps.get_info.outputs.path }}
-          asset_name: vmaf.exe
-          asset_content_type: application/vnd.microsoft.portable-executable
+          name: msvc-vmaf
+          path: C:/vmaf-install-msvc/bin/vmaf.exe
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..758af9340
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "GerHobbelt-pthread-win32"]
+	path = libvmaf/subprojects/pthread-win32
+	url = https://github.com/GerHobbelt/pthread-win32
diff --git a/libvmaf/meson.build b/libvmaf/meson.build
index cf20cecbd..39190989c 100644
--- a/libvmaf/meson.build
+++ b/libvmaf/meson.build
@@ -29,6 +29,13 @@ elif host_machine.system() == 'darwin'
 endif
 
 # Header checks
+if cc.has_header('malloc.h', args: test_args)
+    add_project_arguments('-DHAVE_MALLOC_H', language: ['c', 'cpp'])
+endif
+if cc.has_header('alloca.h', args: test_args)
+    add_project_arguments('-DHAVE_ALLOCA_H', language: ['c', 'cpp'])
+endif
+
 stdatomic_dependency = []
 if not cc.check_header('stdatomic.h')
     if cc.get_id() == 'msvc'
@@ -46,6 +53,37 @@ if not cc.check_header('stdatomic.h')
     endif
 endif
 
+builtin_clz_dependency = []
+if cc.get_id() == 'msvc'
+    builtin_clz_dependency = declare_dependency(
+        include_directories : include_directories('src/compat/msvc'),
+        compile_args : ['-DHAVE_BUILTIN_CLZ_COMPAT'],
+    )
+endif
+
+if cc.check_header('getopt.h', args: test_args)
+    getopt_dependency = declare_dependency(compile_args : '-DHAVE_GETOPT_H')
+else
+    getopt_dependency = declare_dependency(
+        compile_args : '-DHAVE_GETOPT_H',
+        sources: ['src/compat/getopt/getopt.c'],
+        include_directories : include_directories('src/compat/getopt'),
+    )
+endif
+
+pthreadwin32_inc = []
+if cc.get_id() == 'msvc'
+    cmake = import('cmake')
+
+    pthreadwin32_opts = cmake.subproject_options()
+    pthreadwin32_opts.add_cmake_defines({
+        'BUILD_SHARED_LIBS': false,
+    })
+    pthreadwin32_proj = cmake.subproject('pthread-win32', options: pthreadwin32_opts)
+    pthreadwin32_lib = pthreadwin32_proj.dependency('pthreadVC3')
+    pthreadwin32_inc = include_directories('./subprojects/pthread-win32')
+endif
+
 subdir('include')
 subdir('src')
 subdir('tools')
diff --git a/libvmaf/src/compat/getopt/README.md b/libvmaf/src/compat/getopt/README.md
new file mode 100644
index 000000000..9009867b5
--- /dev/null
+++ b/libvmaf/src/compat/getopt/README.md
@@ -0,0 +1,38 @@
+ya_getopt - Yet another getopt
+==============================
+
+What is ya_getopt.
+------------------
+
+Ya_getopt is a drop-in replacement of [GNU C library getopt](http://man7.org/linux/man-pages/man3/getopt.3.html).
+`getopt()`, `getopt_long()` and `getopt_long_only()` are implemented excluding the following GNU extension features.
+
+1. If *optstring* contains **W** followed by a semicolon, then **-W** **foo** is
+   treated as the long option **--foo**.
+
+2. \_\<PID>\_GNU\_nonoption\_argv\_flags\_
+
+The license is 2-clause BSD-style license. You can use the Linux getopt compatible function
+under Windows, Solaris and so on without having to worry about license issue.
+
+Note for contributors
+---------------------
+
+Don't send me a patch if you have looked at GNU C library getopt source code.
+That's because I made this with clean room design to avoid the influence of the GNU LGPL.
+
+Please make a test script passed by the GNU C library getopt but not by ya_getopt instead.
+
+License
+-------
+
+2-clause BSD-style license
+
+Other getopt functions
+----------------------
+
+* [public domain AT&T getopt](https://www.google.co.jp/search?q=public+domain+at%26t+getopt) public domain, no getopt_long, no getopt_long_only, no argv permutation
+* [Free Getopt](http://freegetopt.sourceforge.net/) 3-clause BSD-style licence, no getopt_long, no getopt_long_only
+* [getopt_port](https://github.com/kimgr/getopt_port/) 3-clause BSD-style licence, no getopt_long_only, no argv permutation
+* [XGetopt - A Unix-compatible getopt() for MFC and Win32](http://www.codeproject.com/Articles/1940/XGetopt-A-Unix-compatible-getopt-for-MFC-and-Win32)
+* [Full getopt Port for Unicode and Multibyte Microsoft Visual C, C++, or MFC Projects](http://www.codeproject.com/Articles/157001/Full-getopt-Port-for-Unicode-and-Multibyte-Microso) LGPL
diff --git a/libvmaf/src/compat/getopt/getopt.c b/libvmaf/src/compat/getopt/getopt.c
new file mode 100644
index 000000000..a6ffca952
--- /dev/null
+++ b/libvmaf/src/compat/getopt/getopt.c
@@ -0,0 +1,318 @@
+/* -*- indent-tabs-mode: nil -*-
+ *
+ * ya_getopt  - Yet another getopt
+ * https://github.com/kubo/ya_getopt
+ *
+ * Copyright 2015 Kubo Takehiro <kubo@jiubao.org>
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice, this list of
+ *       conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *       of conditions and the following disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of the authors.
+ *
+ */
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include "getopt.h"
+
+char *ya_optarg = NULL;
+int ya_optind = 1;
+int ya_opterr = 1;
+int ya_optopt = '?';
+static char *ya_optnext = NULL;
+static int posixly_correct = -1;
+static int handle_nonopt_argv = 0;
+
+static void ya_getopt_error(const char *optstring, const char *format, ...);
+static void check_gnu_extension(const char *optstring);
+static int ya_getopt_internal(int argc, char * const argv[], const char *optstring, const struct option *longopts, int *longindex, int long_only);
+static int ya_getopt_shortopts(int argc, char * const argv[], const char *optstring, int long_only);
+static int ya_getopt_longopts(int argc, char * const argv[], char *arg, const char *optstring, const struct option *longopts, int *longindex, int *long_only_flag);
+
+static void ya_getopt_error(const char *optstring, const char *format, ...)
+{
+    if (ya_opterr && optstring[0] != ':') {
+        va_list ap;
+        va_start(ap, format);
+        vfprintf(stderr, format, ap);
+        va_end(ap);
+    }
+}
+
+static void check_gnu_extension(const char *optstring)
+{
+    if (optstring[0] == '+' || getenv("POSIXLY_CORRECT") != NULL) {
+        posixly_correct = 1;
+    } else {
+        posixly_correct = 0;
+    }
+    if (optstring[0] == '-') {
+        handle_nonopt_argv = 1;
+    } else {
+        handle_nonopt_argv = 0;
+    }
+}
+
+static int is_option(const char *arg)
+{
+    return arg[0] == '-' && arg[1] != '\0';
+}
+
+int ya_getopt(int argc, char * const argv[], const char *optstring)
+{
+    return ya_getopt_internal(argc, argv, optstring, NULL, NULL, 0);
+}
+
+int ya_getopt_long(int argc, char * const argv[], const char *optstring, const struct option *longopts, int *longindex)
+{
+    return ya_getopt_internal(argc, argv, optstring, longopts, longindex, 0);
+}
+
+int ya_getopt_long_only(int argc, char * const argv[], const char *optstring, const struct option *longopts, int *longindex)
+{
+    return ya_getopt_internal(argc, argv, optstring, longopts, longindex, 1);
+}
+
+static int ya_getopt_internal(int argc, char * const argv[], const char *optstring, const struct option *longopts, int *longindex, int long_only)
+{
+    static int start, end;
+
+    if (ya_optopt == '?') {
+        ya_optopt = 0;
+    }
+
+    if (posixly_correct == -1) {
+        check_gnu_extension(optstring);
+    }
+
+    if (ya_optind == 0) {
+        check_gnu_extension(optstring);
+        ya_optind = 1;
+        ya_optnext = NULL;
+    }
+
+    switch (optstring[0]) {
+    case '+':
+    case '-':
+        optstring++;
+    }
+
+    if (ya_optnext == NULL && start != 0) {
+        int last_pos = ya_optind - 1;
+
+        ya_optind -= end - start;
+        if (ya_optind <= 0) {
+            ya_optind = 1;
+        }
+        while (start < end--) {
+            int i;
+            char *arg = argv[end];
+
+            for (i = end; i < last_pos; i++) {
+                ((char **)argv)[i] = argv[i + 1];
+            }
+            ((char const **)argv)[i] = arg;
+            last_pos--;
+        }
+        start = 0;
+    }
+
+    if (ya_optind >= argc) {
+        ya_optarg = NULL;
+        return -1;
+    }
+    if (ya_optnext == NULL) {
+        const char *arg = argv[ya_optind];
+        if (!is_option(arg)) {
+            if (handle_nonopt_argv) {
+                ya_optarg = argv[ya_optind++];
+                start = 0;
+                return 1;
+            } else if (posixly_correct) {
+                ya_optarg = NULL;
+                return -1;
+            } else {
+                int i;
+
+                start = ya_optind;
+                for (i = ya_optind + 1; i < argc; i++) {
+                    if (is_option(argv[i])) {
+                        end = i;
+                        break;
+                    }
+                }
+                if (i == argc) {
+                    ya_optarg = NULL;
+                    return -1;
+                }
+                ya_optind = i;
+                arg = argv[ya_optind];
+            }
+        }
+        if (strcmp(arg, "--") == 0) {
+            ya_optind++;
+            return -1;
+        }
+        if (longopts != NULL && arg[1] == '-') {
+            return ya_getopt_longopts(argc, argv, argv[ya_optind] + 2, optstring, longopts, longindex, NULL);
+        }
+    }
+
+    if (ya_optnext == NULL) {
+        ya_optnext = argv[ya_optind] + 1;
+    }
+    if (long_only) {
+        int long_only_flag = 0;
+        int rv = ya_getopt_longopts(argc, argv, ya_optnext, optstring, longopts, longindex, &long_only_flag);
+        if (!long_only_flag) {
+            ya_optnext = NULL;
+            return rv;
+        }
+    }
+
+    return ya_getopt_shortopts(argc, argv, optstring, long_only);
+}
+
+static int ya_getopt_shortopts(int argc, char * const argv[], const char *optstring, int long_only)
+{
+    int opt = *ya_optnext;
+    const char *os = strchr(optstring, opt);
+
+    if (os == NULL) {
+        ya_optarg = NULL;
+        if (long_only) {
+            ya_getopt_error(optstring, "%s: unrecognized option '-%s'\n", argv[0], ya_optnext);
+            ya_optind++;
+            ya_optnext = NULL;
+        } else {
+            ya_optopt = opt;
+            ya_getopt_error(optstring, "%s: invalid option -- '%c'\n", argv[0], opt);
+            if (*(++ya_optnext) == 0) {
+                ya_optind++;
+                ya_optnext = NULL;
+            }
+        }
+        return '?';
+    }
+    if (os[1] == ':') {
+        if (ya_optnext[1] == 0) {
+            ya_optind++;
+            ya_optnext = NULL;
+            if (os[2] == ':') {
+                /* optional argument */
+                ya_optarg = NULL;
+            } else {
+                if (ya_optind == argc) {
+                    ya_optarg = NULL;
+                    ya_optopt = opt;
+                    ya_getopt_error(optstring, "%s: option requires an argument -- '%c'\n", argv[0], opt);
+                    if (optstring[0] == ':') {
+                        return ':';
+                    } else {
+                        return '?';
+                    }
+                }
+                ya_optarg = argv[ya_optind];
+                ya_optind++;
+            }
+        } else {
+            ya_optarg = ya_optnext + 1;
+            ya_optind++;
+        }
+        ya_optnext = NULL;
+    } else {
+        ya_optarg = NULL;
+        if (ya_optnext[1] == 0) {
+            ya_optnext = NULL;
+            ya_optind++;
+        } else {
+            ya_optnext++;
+        }
+    }
+    return opt;
+}
+
+static int ya_getopt_longopts(int argc, char * const argv[], char *arg, const char *optstring, const struct option *longopts, int *longindex, int *long_only_flag)
+{
+    char *val = NULL;
+    const struct option *opt;
+    size_t namelen;
+    int idx;
+
+    for (idx = 0; longopts[idx].name != NULL; idx++) {
+        opt = &longopts[idx];
+        namelen = strlen(opt->name);
+        if (strncmp(arg, opt->name, namelen) == 0) {
+            switch (arg[namelen]) {
+            case '\0':
+                switch (opt->has_arg) {
+                case ya_required_argument:
+                    ya_optind++;
+                    if (ya_optind == argc) {
+                        ya_optarg = NULL;
+                        ya_optopt = opt->val;
+                        ya_getopt_error(optstring, "%s: option '--%s' requires an argument\n", argv[0], opt->name);
+                        if (optstring[0] == ':') {
+                            return ':';
+                        } else {
+                            return '?';
+                        }
+                    }
+                    val = argv[ya_optind];
+                    break;
+                }
+                goto found;
+            case '=':
+                if (opt->has_arg == ya_no_argument) {
+                    const char *hyphens = (argv[ya_optind][1] == '-') ? "--" : "-";
+
+                    ya_optind++;
+                    ya_optarg = NULL;
+                    ya_optopt = opt->val;
+                    ya_getopt_error(optstring, "%s: option '%s%s' doesn't allow an argument\n", argv[0], hyphens, opt->name);
+                    return '?';
+                }
+                val = arg + namelen + 1;
+                goto found;
+            }
+        }
+    }
+    if (long_only_flag) {
+        *long_only_flag = 1;
+    } else {
+        ya_getopt_error(optstring, "%s: unrecognized option '%s'\n", argv[0], argv[ya_optind]);
+        ya_optind++;
+    }
+    return '?';
+found:
+    ya_optarg = val;
+    ya_optind++;
+    if (opt->flag) {
+        *opt->flag = opt->val;
+    }
+    if (longindex) {
+        *longindex = idx;
+    }
+    return opt->flag ? 0 : opt->val;
+}
diff --git a/libvmaf/src/compat/getopt/getopt.h b/libvmaf/src/compat/getopt/getopt.h
new file mode 100644
index 000000000..4244c67d0
--- /dev/null
+++ b/libvmaf/src/compat/getopt/getopt.h
@@ -0,0 +1,77 @@
+/* -*- indent-tabs-mode: nil -*-
+ *
+ * ya_getopt  - Yet another getopt
+ * https://github.com/kubo/ya_getopt
+ *
+ * Copyright 2015 Kubo Takehiro <kubo@jiubao.org>
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice, this list of
+ *       conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *       of conditions and the following disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of the authors.
+ *
+ */
+#ifndef YA_GETOPT_H
+#define YA_GETOPT_H 1
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define ya_no_argument        0
+#define ya_required_argument  1
+#define ya_optional_argument  2
+
+struct option {
+    const char *name;
+    int has_arg;
+    int *flag;
+    int val;
+};
+
+int ya_getopt(int argc, char * const argv[], const char *optstring);
+int ya_getopt_long(int argc, char * const argv[], const char *optstring,
+                   const struct option *longopts, int *longindex);
+int ya_getopt_long_only(int argc, char * const argv[], const char *optstring,
+                        const struct option *longopts, int *longindex);
+
+extern char *ya_optarg;
+extern int ya_optind, ya_opterr, ya_optopt;
+
+#ifndef YA_GETOPT_NO_COMPAT_MACRO
+#define getopt ya_getopt
+#define getopt_long ya_getopt_long
+#define getopt_long_only ya_getopt_long_only
+#define optarg ya_optarg
+#define optind ya_optind
+#define opterr ya_opterr
+#define optopt ya_optopt
+#define no_argument ya_no_argument
+#define required_argument ya_required_argument
+#define optional_argument ya_optional_argument
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/libvmaf/src/compat/msvc/builtin_clz.h b/libvmaf/src/compat/msvc/builtin_clz.h
new file mode 100644
index 000000000..f93227f8b
--- /dev/null
+++ b/libvmaf/src/compat/msvc/builtin_clz.h
@@ -0,0 +1,11 @@
+#pragma once
+#include <intrin.h>
+
+static inline int __builtin_clz(unsigned x) {
+    return (int)__lzcnt(x);
+}
+
+static inline int __builtin_clzll(unsigned long long x) {
+    return (int)__lzcnt64(x);
+}
+
diff --git a/libvmaf/src/compat/msvc/stdatomic.h b/libvmaf/src/compat/msvc/stdatomic.h
index 979ee2ba8..7540775fd 100644
--- a/libvmaf/src/compat/msvc/stdatomic.h
+++ b/libvmaf/src/compat/msvc/stdatomic.h
@@ -39,8 +39,6 @@
 
 #include <windows.h>
 
-#include "common/attributes.h"
-
 typedef volatile LONG  __declspec(align(32)) atomic_int;
 typedef volatile ULONG __declspec(align(32)) atomic_uint;
 
diff --git a/libvmaf/src/feature/ciede.c b/libvmaf/src/feature/ciede.c
index 7bbfa8f60..b4abbf0b7 100644
--- a/libvmaf/src/feature/ciede.c
+++ b/libvmaf/src/feature/ciede.c
@@ -43,6 +43,10 @@ SOFTWARE.
 */
 
 #include <errno.h>
+#ifdef _MSC_VER
+// MSVC needs this to get M_PI defined in math.h
+#define _USE_MATH_DEFINES
+#endif
 #include <math.h>
 #include <stddef.h>
 #include <string.h>
diff --git a/libvmaf/src/feature/cuda/integer_adm_cuda.c b/libvmaf/src/feature/cuda/integer_adm_cuda.c
index 511480578..e80604294 100644
--- a/libvmaf/src/feature/cuda/integer_adm_cuda.c
+++ b/libvmaf/src/feature/cuda/integer_adm_cuda.c
@@ -30,7 +30,6 @@
 #include "cpu.h"
 #include "cuda/integer_adm_cuda.h"
 #include "picture_cuda.h"
-#include <unistd.h>
 
 #include <assert.h>
 
diff --git a/libvmaf/src/feature/integer_adm.h b/libvmaf/src/feature/integer_adm.h
index bf3836460..adf220c36 100644
--- a/libvmaf/src/feature/integer_adm.h
+++ b/libvmaf/src/feature/integer_adm.h
@@ -8,6 +8,10 @@
 #include <stdint.h>
 #include <string.h>
 
+#ifdef HAVE_BUILTIN_CLZ_COMPAT
+#include "builtin_clz.h"
+#endif
+
 static int32_t div_lookup[65537];
 static const int32_t div_Q_factor = 1073741824; // 2^30
 
diff --git a/libvmaf/src/feature/integer_vif.c b/libvmaf/src/feature/integer_vif.c
index fd6a82317..3e069e8de 100644
--- a/libvmaf/src/feature/integer_vif.c
+++ b/libvmaf/src/feature/integer_vif.c
@@ -630,22 +630,22 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
     if (!data) return -ENOMEM;
     memset(data, 0, data_sz);
 
-    s->public.buf.data = data; data += pad_size;
-    s->public.buf.ref = data; data += frame_size + pad_size + pad_size;
-    s->public.buf.dis = data; data += frame_size + pad_size;
-    s->public.buf.mu1 = data; data += h * s->public.buf.stride_16;
-    s->public.buf.mu2 = data; data += h * s->public.buf.stride_16;
-    s->public.buf.mu1_32 = data; data += s->public.buf.stride_32;
-    s->public.buf.mu2_32 = data; data += s->public.buf.stride_32;
-    s->public.buf.ref_sq = data; data += s->public.buf.stride_32;
-    s->public.buf.dis_sq = data; data += s->public.buf.stride_32;
-    s->public.buf.ref_dis = data; data += s->public.buf.stride_32;
-    s->public.buf.tmp.mu1 = data; data += s->public.buf.stride_tmp;
-    s->public.buf.tmp.mu2 = data; data += s->public.buf.stride_tmp;
-    s->public.buf.tmp.ref = data; data += s->public.buf.stride_tmp;
-    s->public.buf.tmp.dis = data; data += s->public.buf.stride_tmp;
-    s->public.buf.tmp.ref_dis = data; data += s->public.buf.stride_tmp;
-    s->public.buf.tmp.ref_convol = data; data += s->public.buf.stride_tmp;
+    s->public.buf.data = data; data = (char *)data + pad_size;
+    s->public.buf.ref = data; data = (char *)data + (frame_size + pad_size + pad_size);
+    s->public.buf.dis = data; data = (char *)data + (frame_size + pad_size);
+    s->public.buf.mu1 = data; data = (char *)data + (h * s->public.buf.stride_16);
+    s->public.buf.mu2 = data; data = (char *)data + (h * s->public.buf.stride_16);
+    s->public.buf.mu1_32 = data; data = (char *)data + s->public.buf.stride_32;
+    s->public.buf.mu2_32 = data; data = (char *)data + s->public.buf.stride_32;
+    s->public.buf.ref_sq = data; data = (char *)data + s->public.buf.stride_32;
+    s->public.buf.dis_sq = data; data = (char *)data + s->public.buf.stride_32;
+    s->public.buf.ref_dis = data; data = (char *)data + s->public.buf.stride_32;
+    s->public.buf.tmp.mu1 = data; data = (char *)data + s->public.buf.stride_tmp;
+    s->public.buf.tmp.mu2 = data; data = (char *)data + s->public.buf.stride_tmp;
+    s->public.buf.tmp.ref = data; data = (char *)data + s->public.buf.stride_tmp;
+    s->public.buf.tmp.dis = data; data = (char *)data + s->public.buf.stride_tmp;
+    s->public.buf.tmp.ref_dis = data; data = (char *)data + s->public.buf.stride_tmp;
+    s->public.buf.tmp.ref_convol = data; data = (char *)data + s->public.buf.stride_tmp;
     s->public.buf.tmp.dis_convol = data;
 
     s->feature_name_dict =
@@ -656,7 +656,7 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
     return 0;
 
 fail:
-    if (data) aligned_free(data);
+    if (s->public.buf.data) aligned_free(s->public.buf.data);
     vmaf_dictionary_free(&s->feature_name_dict);
     return -ENOMEM;
 }
diff --git a/libvmaf/src/feature/integer_vif.h b/libvmaf/src/feature/integer_vif.h
index 42b4108b4..759de4bf3 100644
--- a/libvmaf/src/feature/integer_vif.h
+++ b/libvmaf/src/feature/integer_vif.h
@@ -24,6 +24,10 @@
 #include <assert.h>
 #include "cpu.h"
 
+#ifdef HAVE_BUILTIN_CLZ_COMPAT
+#include "builtin_clz.h"
+#endif
+
 /* Enhancement gain imposed on vif, must be >= 1.0, where 1.0 means the gain is completely disabled */
 #ifndef DEFAULT_VIF_ENHN_GAIN_LIMIT
 #define DEFAULT_VIF_ENHN_GAIN_LIMIT (100.0)
@@ -127,20 +131,6 @@ void vif_statistic_16(struct VifPublicState *s, float *num, float *den, unsigned
 VifResiduals vif_compute_line_residuals(VifPublicState *s, unsigned from,
                                         unsigned to, int scale);
 
-
-#ifdef _MSC_VER
-#include <intrin.h>
-
-static inline int __builtin_clz(unsigned x) {
-    return (int)__lzcnt(x);
-}
-
-static inline int __builtin_clzll(unsigned long long x) {
-    return (int)__lzcnt64(x);
-}
-
-#endif
-
 static inline int32_t log2_32(const uint16_t *log2_table, uint32_t temp)
 {
     int k = __builtin_clz(temp);
diff --git a/libvmaf/src/feature/mkdirp.c b/libvmaf/src/feature/mkdirp.c
index 16fe966b3..4ff062252 100644
--- a/libvmaf/src/feature/mkdirp.c
+++ b/libvmaf/src/feature/mkdirp.c
@@ -6,7 +6,15 @@
 // MIT licensed
 //
 
-#include <unistd.h>
+#ifdef _WIN32
+#   include <io.h>
+#   include <direct.h> /* _mkdir, _wmkdir */
+#   ifndef strdup
+#       define strdup _strdup
+#   endif
+#else
+#   include <unistd.h>
+#endif
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
@@ -64,12 +72,18 @@ mkdirp(const char *path, mode_t mode) {
   free(parent);
 
   // make this one if parent has been made
-  #ifdef _WIN32
-    // http://msdn.microsoft.com/en-us/library/2fkk4dzw.aspx
-    int rc = mkdir(pathname);
-  #else
-    int rc = mkdir(pathname, mode);
-  #endif
+#ifdef _WIN32
+  /* On Windows, both MSVC and mingw use _mkdir which takes only a path.
+   * Microsoft docs: _mkdir and _wmkdir create a new directory and return 0
+   * on success or -1 on error, setting errno accordingly. See CRT docs:
+   * https://learn.microsoft.com/en-us/c-runtime-library/reference/mkdir-wmkdir
+   *
+   * The CRT documents that _mkdir/_wmkdir behave like mkdir but accept only
+   * a path (no mode) and set errno on failure (EEXIST, ENOENT, ...). */
+  int rc = _mkdir(pathname);
+#else
+  int rc = mkdir(pathname, mode);
+#endif
 
   free(pathname);
 
diff --git a/libvmaf/src/feature/mkdirp.h b/libvmaf/src/feature/mkdirp.h
index 02371fb57..344b8a0b5 100644
--- a/libvmaf/src/feature/mkdirp.h
+++ b/libvmaf/src/feature/mkdirp.h
@@ -11,6 +11,10 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#ifdef _MSC_VER
+    /* On MSVC provide a minimal mode_t typedef */
+    typedef int mode_t;
+#endif
 
 /*
  * Recursively `mkdir(path, mode)`
diff --git a/libvmaf/src/feature/x86/adm_avx2.c b/libvmaf/src/feature/x86/adm_avx2.c
index 542ed3de5..d5a8dee06 100644
--- a/libvmaf/src/feature/x86/adm_avx2.c
+++ b/libvmaf/src/feature/x86/adm_avx2.c
@@ -819,12 +819,12 @@ void adm_decouple_avx2(AdmBuffer *buf, int w, int h, int stride,
             __m256 od_inv_64 = _mm256_mul_ps(inv_64, _mm256_cvtepi32_ps(od));
             __m256 rst_d_f = _mm256_mul_ps(kd_inv_32768, od_inv_64);
 
-            __m256i gt0_rst_h_f = (__m256i)(_mm256_cmp_ps(rst_h_f, _mm256_setzero_ps(), 14));
-            __m256i lt0_rst_h_f = (__m256i)(_mm256_cmp_ps(rst_h_f, _mm256_setzero_ps(), 1));
-            __m256i gt0_rst_v_f = (__m256i)(_mm256_cmp_ps(rst_v_f, _mm256_setzero_ps(), 14));
-            __m256i lt0_rst_v_f = (__m256i)(_mm256_cmp_ps(rst_v_f, _mm256_setzero_ps(), 1));
-            __m256i gt0_rst_d_f = (__m256i)(_mm256_cmp_ps(rst_d_f, _mm256_setzero_ps(), 14));
-            __m256i lt0_rst_d_f = (__m256i)(_mm256_cmp_ps(rst_d_f, _mm256_setzero_ps(), 1));
+            __m256i gt0_rst_h_f = _mm256_castps_si256(_mm256_cmp_ps(rst_h_f, _mm256_setzero_ps(), 14));
+            __m256i lt0_rst_h_f = _mm256_castps_si256(_mm256_cmp_ps(rst_h_f, _mm256_setzero_ps(), 1));
+            __m256i gt0_rst_v_f = _mm256_castps_si256(_mm256_cmp_ps(rst_v_f, _mm256_setzero_ps(), 14));
+            __m256i lt0_rst_v_f = _mm256_castps_si256(_mm256_cmp_ps(rst_v_f, _mm256_setzero_ps(), 1));
+            __m256i gt0_rst_d_f = _mm256_castps_si256(_mm256_cmp_ps(rst_d_f, _mm256_setzero_ps(), 14));
+            __m256i lt0_rst_d_f = _mm256_castps_si256(_mm256_cmp_ps(rst_d_f, _mm256_setzero_ps(), 1));
 
             __m256i mask_min_max_h = _mm256_or_si256(gt0_rst_h_f, lt0_rst_h_f);
             __m256i mask_min_max_v = _mm256_or_si256(gt0_rst_v_f, lt0_rst_v_f);
@@ -834,7 +834,7 @@ void adm_decouple_avx2(AdmBuffer *buf, int w, int h, int stride,
             __m256i mask_rst_v = _mm256_and_si256(mask_min_max_v, angle_flag);
             __m256i mask_rst_d = _mm256_and_si256(mask_min_max_d, angle_flag);
 
-	    __m256d adm_gain_d = _mm256_set1_pd(adm_enhn_gain_limit);
+	        __m256d adm_gain_d = _mm256_set1_pd(adm_enhn_gain_limit);
             __m256d rst_h_gainlo_d = _mm256_mul_pd(_mm256_cvtepi32_pd(_mm256_extractf128_si256(rst_h, 0)), adm_gain_d);
             __m256d rst_h_gainhi_d = _mm256_mul_pd(_mm256_cvtepi32_pd(_mm256_extractf128_si256(rst_h, 1)), adm_gain_d);
             __m256i rst_h_gain = _mm256_insertf128_si256(_mm256_castsi128_si256(_mm256_cvtpd_epi32(rst_h_gainlo_d)), _mm256_cvtpd_epi32(rst_h_gainhi_d),1);
@@ -2088,15 +2088,15 @@ float adm_cm_avx2(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride
             }
             accum_inner_h_lo_256 = _mm256_add_epi64(accum_inner_h_lo_256, accum_inner_h_hi_256);
             __m128i r2_h = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_h_lo_256), _mm256_extracti128_si256(accum_inner_h_lo_256, 1));
-            int64_t res_h = r2_h[0] + r2_h[1];
+            int64_t res_h = _mm_extract_epi64(r2_h, 0) + _mm_extract_epi64(r2_h, 1);
 
             accum_inner_v_lo_256 = _mm256_add_epi64(accum_inner_v_lo_256, accum_inner_v_hi_256);
             __m128i r2_v = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_v_lo_256), _mm256_extracti128_si256(accum_inner_v_lo_256, 1));
-            int64_t res_v = r2_v[0] + r2_v[1];
+            int64_t res_v = _mm_extract_epi64(r2_v, 0) + _mm_extract_epi64(r2_v, 1);
 
             accum_inner_d_lo_256 = _mm256_add_epi64(accum_inner_d_lo_256, accum_inner_d_hi_256);
             __m128i r2_d = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_d_lo_256), _mm256_extracti128_si256(accum_inner_d_lo_256, 1));
-            int64_t res_d = r2_d[0] + r2_d[1];
+            int64_t res_d = _mm_extract_epi64(r2_d, 0) + _mm_extract_epi64(r2_d, 1);
 
             for (j = end_col_mod6; j < end_col; ++j) {
                 xh = src->band_h[i * src_stride + j] * i_rfactor[0];
@@ -2521,13 +2521,13 @@ float i4_adm_cm_avx2(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_str
             }
 
             __m128i r2_h = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_h_256), _mm256_extracti128_si256(accum_inner_h_256, 1));
-            int64_t res_h = r2_h[0] + r2_h[1];
+            int64_t res_h = _mm_extract_epi64(r2_h, 0) + _mm_extract_epi64(r2_h, 1);
 
             __m128i r2_v = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_v_256), _mm256_extracti128_si256(accum_inner_v_256, 1));
-            int64_t res_v = r2_v[0] + r2_v[1];
+            int64_t res_v = _mm_extract_epi64(r2_v, 0) + _mm_extract_epi64(r2_v, 1);
 
             __m128i r2_d = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_d_256), _mm256_extracti128_si256(accum_inner_d_256, 1));
-            int64_t res_d = r2_d[0] + r2_d[1];
+            int64_t res_d = _mm_extract_epi64(r2_d, 0) + _mm_extract_epi64(r2_d, 1);
 
             for (j = end_col_mod2; j < end_col; ++j)
             {
@@ -3586,15 +3586,15 @@ float adm_csf_den_scale_avx2(const adm_dwt_band_t *src, int w, int h,
 
         accum_inner_h_lo = _mm256_add_epi64(accum_inner_h_lo, accum_inner_h_hi);
         __m128i h_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_h_lo), _mm256_extracti128_si256(accum_inner_h_lo, 1));
-        uint64_t h_r1 = h_r2[0] + h_r2[1];
+        uint64_t h_r1 = _mm_extract_epi64(h_r2, 0) + _mm_extract_epi64(h_r2, 1);
 
         accum_inner_v_lo = _mm256_add_epi64(accum_inner_v_lo, accum_inner_v_hi);
         __m128i v_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_v_lo), _mm256_extracti128_si256(accum_inner_v_lo, 1));
-        uint64_t v_r1 = v_r2[0] + v_r2[1];
+        uint64_t v_r1 = _mm_extract_epi64(v_r2, 0) + _mm_extract_epi64(v_r2, 1);
 
         accum_inner_d_lo = _mm256_add_epi64(accum_inner_d_lo, accum_inner_d_hi);
         __m128i d_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_d_lo), _mm256_extracti128_si256(accum_inner_d_lo, 1));
-        uint64_t d_r1 = d_r2[0] + d_r2[1];
+        uint64_t d_r1 = _mm_extract_epi64(d_r2, 0) + _mm_extract_epi64(d_r2, 1);
 
         for (int j = right_mod_8; j < right; ++j) {
             uint16_t h = (uint16_t)abs(src_h[j]);
@@ -3992,13 +3992,13 @@ float adm_csf_den_s123_avx2(const i4_adm_dwt_band_t *src, int scale, int w, int
             accum_inner_d_256 = _mm256_add_epi64(accum_inner_d_256, d_cu);
         }
         __m128i h_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_h_256), _mm256_extracti128_si256(accum_inner_h_256, 1));
-        uint64_t h_r1 = h_r2[0] + h_r2[1];
+        uint64_t h_r1 = _mm_extract_epi64(h_r2, 0) + _mm_extract_epi64(h_r2, 1);
 
         __m128i d_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_d_256), _mm256_extracti128_si256(accum_inner_d_256, 1));
-        uint64_t d_r1 = d_r2[0] + d_r2[1];
+        uint64_t d_r1 = _mm_extract_epi64(d_r2, 0) + _mm_extract_epi64(d_r2, 1);
 
         __m128i v_r2 = _mm_add_epi64(_mm256_castsi256_si128(accum_inner_v_256), _mm256_extracti128_si256(accum_inner_v_256, 1));
-        uint64_t v_r1 = v_r2[0] + v_r2[1];
+        uint64_t v_r1 = _mm_extract_epi64(v_r2, 0) + _mm_extract_epi64(v_r2, 1);
 
         for (int j = right_mod_4; j < right; ++j)
         {
diff --git a/libvmaf/src/feature/x86/adm_avx512.c b/libvmaf/src/feature/x86/adm_avx512.c
index 4a61becfa..d6b26a942 100644
--- a/libvmaf/src/feature/x86/adm_avx512.c
+++ b/libvmaf/src/feature/x86/adm_avx512.c
@@ -1746,17 +1746,17 @@ float adm_cm_avx512(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stri
             accum_inner_h_lo_512 = _mm512_add_epi64(accum_inner_h_lo_512, accum_inner_h_hi_512);
             __m256i r4_h = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_h_lo_512), _mm512_extracti64x4_epi64(accum_inner_h_lo_512, 1));
             __m128i r2_h = _mm_add_epi64(_mm256_castsi256_si128(r4_h), _mm256_extracti128_si256(r4_h, 1));
-            int64_t res_h = r2_h[0] + r2_h[1];
+            int64_t res_h = _mm_extract_epi64(r2_h, 0) + _mm_extract_epi64(r2_h, 1);
 
             accum_inner_v_lo_512 = _mm512_add_epi64(accum_inner_v_lo_512, accum_inner_v_hi_512);
             __m256i r4_v = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_v_lo_512), _mm512_extracti64x4_epi64(accum_inner_v_lo_512, 1));
             __m128i r2_v = _mm_add_epi64(_mm256_castsi256_si128(r4_v), _mm256_extracti128_si256(r4_v, 1));
-            int64_t res_v = r2_v[0] + r2_v[1];
+            int64_t res_v = _mm_extract_epi64(r2_v, 0) + _mm_extract_epi64(r2_v, 1);
 
             accum_inner_d_lo_512 = _mm512_add_epi64(accum_inner_d_lo_512, accum_inner_d_hi_512);
             __m256i r4_d = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_d_lo_512), _mm512_extracti64x4_epi64(accum_inner_d_lo_512, 1));
             __m128i r2_d = _mm_add_epi64(_mm256_castsi256_si128(r4_d), _mm256_extracti128_si256(r4_d, 1));
-            int64_t res_d = r2_d[0] + r2_d[1];
+            int64_t res_d = _mm_extract_epi64(r2_d, 0) + _mm_extract_epi64(r2_d, 1);
 
             for (j = end_col_mod14; j < end_col; ++j) {
                 xh = src->band_h[i * src_stride + j] * i_rfactor[0];
@@ -2168,15 +2168,15 @@ float i4_adm_cm_avx512(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_s
 
             __m256i r4_h = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_h_512), _mm512_extracti64x4_epi64(accum_inner_h_512, 1));
             __m128i r2_h = _mm_add_epi64(_mm256_castsi256_si128(r4_h), _mm256_extracti128_si256(r4_h, 1));
-            int64_t res_h = r2_h[0] + r2_h[1];
+            int64_t res_h = _mm_extract_epi64(r2_h, 0) + _mm_extract_epi64(r2_h, 1);
 
             __m256i r4_v = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_v_512), _mm512_extracti64x4_epi64(accum_inner_v_512, 1));
             __m128i r2_v = _mm_add_epi64(_mm256_castsi256_si128(r4_v), _mm256_extracti128_si256(r4_v, 1));
-            int64_t res_v = r2_v[0] + r2_v[1];
+            int64_t res_v = _mm_extract_epi64(r2_v, 0) + _mm_extract_epi64(r2_v, 1);
 
             __m256i r4_d = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_d_512), _mm512_extracti64x4_epi64(accum_inner_d_512, 1));
             __m128i r2_d = _mm_add_epi64(_mm256_castsi256_si128(r4_d), _mm256_extracti128_si256(r4_d, 1));
-            int64_t res_d = r2_d[0] + r2_d[1];
+            int64_t res_d = _mm_extract_epi64(r2_d, 0) + _mm_extract_epi64(r2_d, 1);
 
             for (j = end_col_mod6; j < end_col; ++j)
             {
@@ -3858,17 +3858,17 @@ float adm_csf_den_scale_avx512(const adm_dwt_band_t *src, int w, int h,
         accum_inner_h_lo = _mm512_add_epi64(accum_inner_h_lo, accum_inner_h_hi);
         __m256i h_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_h_lo), _mm512_extracti64x4_epi64(accum_inner_h_lo, 1));
         __m128i h_r2 = _mm_add_epi64(_mm256_castsi256_si128(h_r4), _mm256_extracti64x2_epi64(h_r4, 1));
-        uint64_t h_r1 = h_r2[0] + h_r2[1];
+        uint64_t h_r1 = _mm_extract_epi64(h_r2, 0) + _mm_extract_epi64(h_r2, 1);
 
         accum_inner_v_lo = _mm512_add_epi64(accum_inner_v_lo, accum_inner_v_hi);
         __m256i v_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_v_lo), _mm512_extracti64x4_epi64(accum_inner_v_lo, 1));
         __m128i v_r2 = _mm_add_epi64(_mm256_castsi256_si128(v_r4), _mm256_extracti64x2_epi64(v_r4, 1));
-        uint64_t v_r1 = v_r2[0] + v_r2[1];
+        uint64_t v_r1 = _mm_extract_epi64(v_r2, 0) + _mm_extract_epi64(v_r2, 1);
 
         accum_inner_d_lo = _mm512_add_epi64(accum_inner_d_lo, accum_inner_d_hi);
         __m256i d_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_d_lo), _mm512_extracti64x4_epi64(accum_inner_d_lo, 1));
         __m128i d_r2 = _mm_add_epi64(_mm256_castsi256_si128(d_r4), _mm256_extracti64x2_epi64(d_r4, 1));
-        uint64_t d_r1 = d_r2[0] + d_r2[1];
+        uint64_t d_r1 = _mm_extract_epi64(d_r2, 0) + _mm_extract_epi64(d_r2, 1);
 
         for (int j = right_mod_16; j < right; ++j) {
             uint16_t h = (uint16_t)abs(src_h[j]);
@@ -3985,15 +3985,15 @@ float adm_csf_den_s123_avx512(const i4_adm_dwt_band_t *src, int scale, int w, in
         }
         __m256i h_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_h_512), _mm512_extracti64x4_epi64(accum_inner_h_512, 1));
         __m128i h_r2 = _mm_add_epi64(_mm256_castsi256_si128(h_r4), _mm256_extracti64x2_epi64(h_r4, 1));
-        uint64_t h_r1 = h_r2[0] + h_r2[1];
+        uint64_t h_r1 = _mm_extract_epi64(h_r2, 0) + _mm_extract_epi64(h_r2, 1);
 
         __m256i d_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_d_512), _mm512_extracti64x4_epi64(accum_inner_d_512, 1));
         __m128i d_r2 = _mm_add_epi64(_mm256_castsi256_si128(d_r4), _mm256_extracti64x2_epi64(d_r4, 1));
-        uint64_t d_r1 = d_r2[0] + d_r2[1];
+        uint64_t d_r1 = _mm_extract_epi64(d_r2, 0) + _mm_extract_epi64(d_r2, 1);
 
         __m256i v_r4 = _mm256_add_epi64(_mm512_castsi512_si256(accum_inner_v_512), _mm512_extracti64x4_epi64(accum_inner_v_512, 1));
         __m128i v_r2 = _mm_add_epi64(_mm256_castsi256_si128(v_r4), _mm256_extracti64x2_epi64(v_r4, 1));
-        uint64_t v_r1 = v_r2[0] + v_r2[1];
+        uint64_t v_r1 = _mm_extract_epi64(v_r2, 0) + _mm_extract_epi64(v_r2, 1);
 
         for (int j = right_mod_8; j < right; ++j)
         {
diff --git a/libvmaf/src/feature/x86/motion_avx2.c b/libvmaf/src/feature/x86/motion_avx2.c
index ed88ad3de..9cdccb851 100644
--- a/libvmaf/src/feature/x86/motion_avx2.c
+++ b/libvmaf/src/feature/x86/motion_avx2.c
@@ -534,7 +534,8 @@ void sad_avx2(VmafPicture *pic_a, VmafPicture *pic_b, uint64_t *sad)
         a += (pic_a->stride[0] / 2);
         b += (pic_b->stride[0] / 2);
     }
-    uint64_t r1 = final_accum[0] + final_accum[1] + final_accum[2] + final_accum[3];
+    uint64_t r1 = _mm256_extract_epi64(final_accum, 0) + _mm256_extract_epi64(final_accum, 1) +
+                  _mm256_extract_epi64(final_accum, 2) + _mm256_extract_epi64(final_accum, 3);
     
     *sad += r1;
 }
diff --git a/libvmaf/src/feature/x86/motion_avx512.c b/libvmaf/src/feature/x86/motion_avx512.c
index 6933d7c3b..b741d7350 100644
--- a/libvmaf/src/feature/x86/motion_avx512.c
+++ b/libvmaf/src/feature/x86/motion_avx512.c
@@ -450,7 +450,7 @@ void sad_avx512(VmafPicture *pic_a, VmafPicture *pic_b, uint64_t *sad)
     }
     __m256i r4 = _mm256_add_epi64(_mm512_castsi512_si256(final_accum), _mm512_extracti64x4_epi64(final_accum, 1));
     __m128i r2 = _mm_add_epi64(_mm256_castsi256_si128(r4), _mm256_extracti64x2_epi64(r4, 1));
-    uint64_t r1 = r2[0] + r2[1];
+    uint64_t r1 = _mm_extract_epi64(r2, 0) + _mm_extract_epi64(r2, 1);
     
     *sad += r1;
 
diff --git a/libvmaf/src/libvmaf.c b/libvmaf/src/libvmaf.c
index 18ccff0e6..094fa2d11 100644
--- a/libvmaf/src/libvmaf.c
+++ b/libvmaf/src/libvmaf.c
@@ -22,6 +22,12 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
 #include <string.h>
 #include <time.h>
 
@@ -894,7 +900,7 @@ int vmaf_score_pooled_model_collection(VmafContext *vmaf,
     const char *suffix_stddev = "_stddev";
     const size_t name_sz =
         strlen(model_collection->name) + strlen(suffix_lo) + 1;
-    char name[name_sz];
+    char *name = alloca(name_sz);
     memset(name, 0, name_sz);
 
     snprintf(name, name_sz, "%s%s", model_collection->name, suffix_bagging);
diff --git a/libvmaf/src/log.c b/libvmaf/src/log.c
index f93c7379f..5672e40ab 100644
--- a/libvmaf/src/log.c
+++ b/libvmaf/src/log.c
@@ -19,7 +19,13 @@
 #include "libvmaf/libvmaf.h"
 
 #include <stdarg.h>
+#ifdef _MSC_VER
+#include <io.h>
+#define isatty  _isatty
+#define fileno  _fileno
+#else
 #include <unistd.h>
+#endif
 
 static enum VmafLogLevel vmaf_log_level = VMAF_LOG_LEVEL_INFO;
 static int istty = 0;
diff --git a/libvmaf/src/meson.build b/libvmaf/src/meson.build
index 4d214bbf3..b102a8cf2 100644
--- a/libvmaf/src/meson.build
+++ b/libvmaf/src/meson.build
@@ -210,10 +210,24 @@ libvmaf_include = include_directories(
     is_nvtx_enabled ? '/usr/local/cuda/include' : '',
 )
 
+if cc.get_id() == 'msvc'
+    thread_lib = declare_dependency(
+        dependencies: [pthreadwin32_lib],
+        include_directories: pthreadwin32_inc,
+        compile_args: [
+            '-DPTW32_CLEANUP_C',
+            '-DPTW32_STATIC_LIB',
+        ],
+    )
+else
+    thread_lib = dependency('threads')
+endif
+
 libvmaf_cpu_static_lib = static_library(
     'libvmaf_cpu',
     libvmaf_cpu_sources,
     include_directories : [libvmaf_include],
+    dependencies: [thread_lib],
 )
 
 platform_specific_cpu_objects = []
@@ -244,11 +258,17 @@ if is_asm_enabled
           feature_src_dir + 'x86/cambi_avx2.c',
       ]
 
+      if cc.get_id() == 'msvc'
+          avx2_cflags = ['/arch:AVX2']
+      else
+          avx2_cflags = ['-mavx2', '-mavx']
+      endif
       x86_avx2_static_lib = static_library(
           'x86_avx2',
           x86_avx2_sources,
           include_directories : vmaf_base_include,
-          c_args : ['-mavx', '-mavx2'] + vmaf_cflags_common,
+          c_args : avx2_cflags + vmaf_cflags_common,
+          dependencies: [builtin_clz_dependency],
       )
 
       platform_specific_cpu_objects += x86_avx2_static_lib.extract_all_objects(recursive: true)
@@ -260,13 +280,18 @@ if is_asm_enabled
             feature_src_dir + 'x86/adm_avx512.c',
         ]
 
+        if cc.get_id() == 'msvc'
+            avx512_cflags = ['/arch:AVX512']
+        else
+            avx512_cflags = ['-mavx512f', '-mavx512dq', '-mavx512bw', '-mavx512cd', '-mavx512dq',
+                            '-mavx512vbmi', '-mavx512vl']
+        endif
         x86_avx512_static_lib = static_library(
             'x86_avx512',
             x86_avx512_sources,
             include_directories : vmaf_base_include,
-            c_args : ['-mavx512f', '-mavx512dq', '-mavx512bw', '-mavx512cd', '-mavx512dq',
-                      '-mavx512vbmi', '-mavx512vl'] +
-                     vmaf_cflags_common,
+            c_args : avx512_cflags + vmaf_cflags_common,
+            dependencies: [builtin_clz_dependency],
         )
 
         platform_specific_cpu_objects += x86_avx512_static_lib.extract_all_objects(recursive: true)
@@ -421,7 +446,6 @@ if is_cuda_enabled
     common_cuda_objects += cuda_static_lib.extract_all_objects()
 endif
 
-thread_lib = dependency('threads')
 math_lib = cc.find_library('m', required : false)
 
 vmaf_include = include_directories(
@@ -496,7 +520,7 @@ libvmaf_feature_static_lib = static_library(
     'libvmaf_feature',
     libvmaf_feature_sources,
     include_directories : [libvmaf_include, vmaf_include, cuda_dir],
-    dependencies: [stdatomic_dependency, cuda_dependency],
+    dependencies: [thread_lib, stdatomic_dependency, cuda_dependency, builtin_clz_dependency],
     objects: common_cuda_objects
 )
 
@@ -555,6 +579,7 @@ libvmaf = library(
       math_lib,
       stdatomic_dependency,
       cuda_dependency,
+      builtin_clz_dependency,
     ],
     objects : [
         platform_specific_cpu_objects,
diff --git a/libvmaf/src/predict.c b/libvmaf/src/predict.c
index 9926de15b..78c6b07a4 100644
--- a/libvmaf/src/predict.c
+++ b/libvmaf/src/predict.c
@@ -21,6 +21,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
 
 #include "dict.h"
 #include "feature/alias.h"
@@ -358,7 +364,7 @@ static int vmaf_bootstrap_predict_score_at_index(
                                         VmafModelCollectionScore *score)
 {
     int err = 0;
-    double scores[model_collection->cnt];
+    double *scores = alloca(model_collection->cnt * sizeof(*scores));
 
     for (unsigned i = 0; i < model_collection->cnt; i++) {
         // mean, stddev, etc. are calculated on untransformed/unclipped scores
@@ -424,7 +430,7 @@ static int vmaf_bootstrap_predict_score_at_index(
     const char *suffix_stddev = "_stddev";
     const size_t name_sz =
         strlen(model_collection->name) + strlen(suffix_lo) + 1;
-    char name[name_sz];
+    char *name = alloca(name_sz);
     memset(name, 0, name_sz);
 
     snprintf(name, name_sz, "%s%s", model_collection->name, suffix_bagging);
diff --git a/libvmaf/src/read_json_model.c b/libvmaf/src/read_json_model.c
index 08405d3b7..97aa5c46f 100644
--- a/libvmaf/src/read_json_model.c
+++ b/libvmaf/src/read_json_model.c
@@ -23,6 +23,12 @@
 
 #include <errno.h>
 #include <stdlib.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
 #include <string.h>
 
 #define MAX_FEATURE_COUNT 64 //FIXME
@@ -493,9 +499,9 @@ static int model_collection_parse(json_stream *s, VmafModel **model,
     if (!c.name) return -ENOMEM;
 
     const size_t cfg_name_sz = strlen(name) + 5 + 1;
-    char cfg_name[cfg_name_sz];
+    char *cfg_name = alloca(cfg_name_sz);
 
-    const size_t generated_key_sz = 4 + 1;
+    enum { generated_key_sz = 4 + 1 };
     char generated_key[generated_key_sz];
 
     unsigned i = 0;
diff --git a/libvmaf/subprojects/pthread-win32 b/libvmaf/subprojects/pthread-win32
new file mode 160000
index 000000000..06e7608bf
--- /dev/null
+++ b/libvmaf/subprojects/pthread-win32
@@ -0,0 +1 @@
+Subproject commit 06e7608bfe926d2bd7176c0b02be0c98f40cced4
diff --git a/libvmaf/test/meson.build b/libvmaf/test/meson.build
index 1fe4d422c..947f749f5 100644
--- a/libvmaf/test/meson.build
+++ b/libvmaf/test/meson.build
@@ -6,7 +6,7 @@ test_inc = include_directories('.')
 
 test_context = executable('test_context',
     ['test.c', 'test_context.c'],
-    include_directories : [libvmaf_inc, test_inc],
+    include_directories : [libvmaf_inc, test_inc, pthreadwin32_inc],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     dependencies:[stdatomic_dependency, cuda_dependency],
 )
@@ -26,7 +26,7 @@ test_feature_collector = executable('test_feature_collector',
     ['test.c', 'test_feature_collector.c', '../src/log.c', '../src/predict.c', '../src/metadata_handler.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/feature/'), include_directories('../src')],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
-    dependencies: cuda_dependency,
+    dependencies: [cuda_dependency, stdatomic_dependency, thread_lib],
     objects : libsvm_static_lib.extract_all_objects(recursive: true),
 )
 
@@ -38,7 +38,7 @@ test_log = executable('test_log',
 test_thread_pool = executable('test_thread_pool',
     ['test.c', 'test_thread_pool.c', '../src/thread_pool.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
-    dependencies : thread_lib,
+    dependencies : [thread_lib, getopt_dependency]
 )
 
 test_model = executable('test_model',
@@ -46,7 +46,7 @@ test_model = executable('test_model',
     include_directories : [libvmaf_inc, test_inc, include_directories('../src')],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     c_args : [vmaf_cflags_common, '-DJSON_MODEL_PATH="'+join_paths(meson.project_source_root(), '../model/')+'"'],
-    dependencies : [thread_lib, cuda_dependency],
+    dependencies : [thread_lib, cuda_dependency, stdatomic_dependency],
     objects : libsvm_static_lib.extract_all_objects(recursive: true),
 )
 
@@ -57,7 +57,7 @@ test_predict = executable('test_predict',
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     c_args : vmaf_cflags_common,
-    dependencies : [thread_lib, cuda_dependency],
+    dependencies : [thread_lib, cuda_dependency, stdatomic_dependency],
     objects : libsvm_static_lib.extract_all_objects(recursive: true),
 )
 
@@ -66,7 +66,9 @@ test_feature_extractor = executable('test_feature_extractor',
      '../src/dict.c', '../src/opt.c', '../src/log.c', '../src/predict.c',
      '../src/metadata_handler.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
-    dependencies : [math_lib, stdatomic_dependency, thread_lib, cuda_dependency],
+    c_args : vmaf_cflags_common,
+    cpp_args : vmaf_cflags_common,
+    dependencies : [math_lib, stdatomic_dependency, thread_lib, cuda_dependency, getopt_dependency],
     objects : [
       common_cuda_objects,
       platform_specific_cpu_objects,
@@ -90,6 +92,7 @@ test_cpu = executable('test_cpu',
 test_ref = executable('test_ref',
     ['test.c', 'test_ref.c', '../src/ref.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
+    dependencies : stdatomic_dependency,
 )
 
 test_feature = executable('test_feature',
@@ -100,15 +103,16 @@ test_feature = executable('test_feature',
 test_ciede = executable('test_ciede',
     ['test.c', 'test_ciede.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
+
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
-    dependencies: cuda_dependency
+    dependencies: [thread_lib, cuda_dependency, stdatomic_dependency]
 )
 
 test_cambi = executable('test_cambi',
     ['test.c', 'test_cambi.c', '../src/picture.c', '../src/mem.c', '../src/ref.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
-    dependencies: cuda_dependency
+    dependencies: [thread_lib, cuda_dependency, stdatomic_dependency]
 )
 
 test_luminance_tools = executable('test_luminance_tools',
@@ -122,11 +126,13 @@ test_cli_parse = executable('test_cli_parse',
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/'), include_directories('../tools/')],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     c_args : [compat_cflags],
+    dependencies : getopt_dependency
 )
 
 test_psnr = executable('test_psnr',
     ['test.c', 'test_psnr.c', '../src/picture.c'],
     include_directories : [libvmaf_inc, test_inc, include_directories('../src/')],
+    dependencies: [thread_lib, stdatomic_dependency],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
 )
 
@@ -136,7 +142,7 @@ test_framesync = executable('test_framesync',
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     c_args : vmaf_cflags_common,
     cpp_args : vmaf_cflags_common,
-    dependencies : thread_lib,
+    dependencies : [thread_lib, stdatomic_dependency],
 )
 
 if get_option('enable_cuda')
diff --git a/libvmaf/test/test_cambi.c b/libvmaf/test/test_cambi.c
index 5318b281c..6a5ccd558 100644
--- a/libvmaf/test/test_cambi.c
+++ b/libvmaf/test/test_cambi.c
@@ -258,7 +258,7 @@ static char *test_decimate_generic()
 static char *test_filter_mode()
 {
     VmafPicture filtered_image, image;
-    unsigned w = 5, h = 5;
+    enum { w = 5, h = 5 };
     uint16_t buffer[3 * w];
 
     int err = 0;
diff --git a/libvmaf/test/test_cli_parse.c b/libvmaf/test/test_cli_parse.c
index 7ba87babe..1654222fe 100644
--- a/libvmaf/test/test_cli_parse.c
+++ b/libvmaf/test/test_cli_parse.c
@@ -15,13 +15,16 @@
  *     limitations under the License.
  *
  */
-
-#include <getopt.h>
-
 #include "test.h"
 
 #include "cli_parse.h"
 
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+#error "Meson target is missing getopt_dependency"
+#endif
+
 static int cli_free_dicts(CLISettings *settings) {
     for (unsigned i = 0; i < settings->feature_cnt; i++) {
         int err = vmaf_feature_dictionary_free(&(settings->feature_cfg[i].opts_dict));
diff --git a/libvmaf/test/test_feature_extractor.c b/libvmaf/test/test_feature_extractor.c
index 3476872fe..496299ab1 100644
--- a/libvmaf/test/test_feature_extractor.c
+++ b/libvmaf/test/test_feature_extractor.c
@@ -56,7 +56,7 @@ static char *test_feature_extractor_context_pool()
 {
     int err = 0;
 
-    const unsigned n_threads = 8;
+    enum { n_threads = 8 };
     VmafFeatureExtractorContextPool *pool;
     err = vmaf_fex_ctx_pool_create(&pool, n_threads);
     mu_assert("problem during vmaf_fex_ctx_pool_create", !err);
diff --git a/libvmaf/test/test_ring_buffer.c b/libvmaf/test/test_ring_buffer.c
index abea74d50..70c2eef87 100644
--- a/libvmaf/test/test_ring_buffer.c
+++ b/libvmaf/test/test_ring_buffer.c
@@ -20,7 +20,12 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef _MSC_VER
+#include <windows.h>
+#define usleep(x) Sleep((x) / 1000)
+#else
 #include <unistd.h>
+#endif
 
 #include "test.h"
 
diff --git a/libvmaf/tools/cli_parse.c b/libvmaf/tools/cli_parse.c
index 4382b0936..e1f316bae 100644
--- a/libvmaf/tools/cli_parse.c
+++ b/libvmaf/tools/cli_parse.c
@@ -1,5 +1,9 @@
 #include <assert.h>
+#ifdef HAVE_GETOPT_H
 #include <getopt.h>
+#else
+#error "Meson target is missing getopt_dependency"
+#endif
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/libvmaf/tools/meson.build b/libvmaf/tools/meson.build
index 79f3366d3..3d9921d02 100644
--- a/libvmaf/tools/meson.build
+++ b/libvmaf/tools/meson.build
@@ -10,8 +10,8 @@ endif
 vmaf = executable(
     'vmaf',
     ['vmaf.c', 'cli_parse.c', 'y4m_input.c', 'vidinput.c', 'yuv_input.c'],
-    include_directories : [libvmaf_inc, vmaf_include],
-    dependencies: [stdatomic_dependency, cuda_dependency],
+    include_directories : [libvmaf_inc, vmaf_include, pthreadwin32_inc],
+    dependencies: [stdatomic_dependency, cuda_dependency, getopt_dependency],
     c_args : [vmaf_cflags_common, compat_cflags],
     link_with : get_option('default_library') == 'both' ? libvmaf.get_static_lib() : libvmaf,
     install : true,
diff --git a/libvmaf/tools/vmaf.c b/libvmaf/tools/vmaf.c
index 316a04892..452d8ae5a 100644
--- a/libvmaf/tools/vmaf.c
+++ b/libvmaf/tools/vmaf.c
@@ -1,7 +1,19 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h> // alloca()
+#endif
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+#ifdef _MSC_VER
+#include <io.h>
+#define isatty _isatty
+#define fileno _fileno
+#else
 #include <unistd.h>
+#endif
 
 #include "cli_parse.h"
 #include "spinner.h"
@@ -274,7 +286,7 @@ int main(int argc, char *argv[])
     model_collection = malloc(model_sz);
     memset(model_collection, 0, model_collection_sz);
 
-    const char *model_collection_label[c.model_cnt];
+    const char **model_collection_label = alloca(c.model_cnt * sizeof(*model_collection_label));
     unsigned model_collection_cnt = 0;
 
     for (unsigned i = 0; i < c.model_cnt; i++) {
diff --git a/resource/doc/windows.md b/resource/doc/windows.md
index 8c3d9bace..a2d1cdeca 100644
--- a/resource/doc/windows.md
+++ b/resource/doc/windows.md
@@ -1,20 +1,77 @@
-# Building vmaf on Windows
-
-We are going to build vmaf in Windows. These steps are in accordance with the corresponding github action for building on Windows and have been tested successfully using a Windows10 machine. They work with either `cmd` or `PowerShell`.
-
-**Note:** This guide is just to build libvmaf on Windows and not involves the python part of project as it is the same across all platforms (settings up virtual environment, ...).
-
-## Steps
-  1. Install [msys2](https://www.msys2.org/)
-  
-  2. Install required msys2 packages using its shell:
-  
-    pacman -S --noconfirm --needed mingw-w64-x86_64-nasm mingw-w64-x86_64-gcc mingw-w64-x86_64-meson mingw-w64-x86_64-ninja
-
-  3. It is assumed the final results will be in `C:/vmaf-install` (You can change it to any directory if you want). Finally setup the meson and build:
-        
-        
-    cd <Vmaf project root directory>
-    mkdir C:/vmaf-install
-    meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix C:/vmaf-install
-    meson install -C libvmaf/build
+# Building libvmaf on Windows
+
+This guide describes how to build `libvmaf` natively on Windows.  
+The steps mirror the configuration used in the official GitHub Actions workflow and have been tested on Windows 11.  
+They work from both `cmd` and PowerShell.
+
+**Note:** This guide covers only the C/C++ library (`libvmaf`).  
+The Python components are platform‑independent and follow the same setup process as on Linux or macOS.
+
+---
+
+## Building with Visual Studio (MSVC)
+
+### Prerequisites
+
+1. **Initialize git submodules**
+
+   Before building with MSVC, make sure the git submodules are initialized and updated.  
+   The Windows build relies on the bundled `pthread-win32` implementation, which is provided as a git submodule.
+
+   ```sh
+   git submodule update --init --recursive
+   ```
+
+2. **Install required tools and ensure they are in your `PATH`:**
+
+   - [Meson](https://github.com/mesonbuild/meson/releases)  
+   - [Ninja](https://github.com/ninja-build/ninja/releases)  
+     (required for the bundled pthread-win32 implementation)
+   - [CMake](https://cmake.org/download/)
+   - [Gvim](https://github.com/vim/vim-win32-installer/releases)  
+     (provides the `xxd` utility needed when building the built‑in models)
+
+### Compilation
+
+3. **Use a Visual Studio environment with compiler variables pre‑configured**,  
+   such as the **"x64 Native Tools Command Prompt"**.
+
+4. **Configure and build:**
+
+   ```cmd
+   cd <vmaf project root>
+   mkdir C:/vmaf-install
+
+   meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix C:/vmaf-install
+   meson install -C libvmaf/build
+   ```
+
+This produces a native MSVC build of `libvmaf` and installs it under `C:/vmaf-install`.
+
+---
+
+## Building with MSYS2 (MinGW)
+
+1. **Install [MSYS2](https://www.msys2.org/)**
+
+2. **From an MSYS2 MinGW64 shell, install the required packages:**
+
+    ```sh
+    pacman -S --noconfirm --needed \
+        mingw-w64-x86_64-nasm \
+        mingw-w64-x86_64-gcc \
+        mingw-w64-x86_64-meson \
+        mingw-w64-x86_64-ninja
+    ```
+
+3. **Configure and build:**
+
+```sh
+cd <vmaf project root>
+mkdir C:/vmaf-install
+
+meson setup libvmaf libvmaf/build --buildtype release --default-library static --prefix C:/vmaf-install
+meson install -C libvmaf/build
+```
+
+This produces a MinGW‑compiled version of `libvmaf` compatible with MSYS2 environments.