diff --git a/.github/actions/setup-djgpp-toolchain/action.yml b/.github/actions/setup-djgpp-toolchain/action.yml new file mode 100644 index 00000000..93a6fe2b --- /dev/null +++ b/.github/actions/setup-djgpp-toolchain/action.yml @@ -0,0 +1,71 @@ +name: 'Setup DJGPP toolchain' +description: 'Download DJGPP and setup CMake toolchain' +runs: + using: 'composite' + steps: + - name: 'Calculate variables' + id: calc + shell: sh + run: | + version="12.2.0" + case "${{ runner.os }}-${{ runner.arch }}" in + "Linux-X86") + archive="djgpp-linux32-gcc1220.tar.bz2" + ;; + "Linux-X64") + archive="djgpp-linux64-gcc1220.tar.bz2" + ;; + "macOS-X86" | "macOS-X64" | "macOS-ARM64") + archive="djgpp-osx-gcc1220.tar.bz2" + ;; + "Windows-X86" | "Windows-X64") + archive="djgpp-mingw-gcc1220.zip" + ;; + *) + echo "Unsupported ${{ runner.os }}-${{ runner.arch }}" + exit 1; + ;; + esac + echo "url=https://github.com/andrewwutw/build-djgpp/releases/download/v3.4/${archive}" >> ${GITHUB_OUTPUT} + echo "archive=${archive}" >> ${GITHUB_OUTPUT} + echo "version=${version}" >> ${GITHUB_OUTPUT} + echo "cache-key=${archive}-${{ inputs.version }}-${{ runner.os }}-${{ runner.arch }}" >> ${GITHUB_OUTPUT} + - name: 'Restore cached ${{ steps.calc.outputs.archive }}' + id: cache-restore + uses: actions/cache/restore@v5 + with: + path: '${{ runner.temp }}/${{ steps.calc.outputs.archive }}' + key: ${{ steps.calc.outputs.cache-key }} + - name: 'Download DJGPP ${{ steps.calc.outputs.version }} for ${{ runner.os }} (${{ runner.arch }})' + if: ${{ !steps.cache-restore.outputs.cache-hit || steps.cache-restore.outputs.cache-hit == 'false' }} + shell: pwsh + run: | + Invoke-WebRequest "${{ steps.calc.outputs.url }}" -OutFile "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + - name: 'Cache ${{ steps.calc.outputs.archive }}' + if: ${{ !steps.cache-restore.outputs.cache-hit || steps.cache-restore.outputs.cache-hit == 'false' }} + uses: actions/cache/save@v5 + with: + path: '${{ runner.temp }}/${{ steps.calc.outputs.archive }}' + key: ${{ steps.calc.outputs.cache-key }} + - name: 'Extract DJGP archive' + shell: pwsh + run: | + $archive = "${{ steps.calc.outputs.archive }}"; + if ($archive.EndsWith(".bz2")) { + # Remove ".bz2" suffix + $tar_archive = $archive.Substring(0, $archive.Length - 4) + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/$tar_archive" + } else { + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + } + - name: 'Install Linux dependenciy' + if: ${{ runner.os == 'Linux' }} + shell: sh + run: | + sudo apt-get install -y libfl-dev + - name: 'Set output variables' + id: final + shell: pwsh + run: | + echo "${{ runner.temp }}/djgpp/bin" >> $env:GITHUB_PATH diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cbf8deb..b8f338ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: name: 'clang-format' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Run clang-format run: | find CONFIG LEGO1 ISLE miniwin -iname '*.h' -o -iname '*.cpp' | xargs \ @@ -48,10 +48,11 @@ jobs: - { name: 'iOS', os: 'macos-15', generator: 'Xcode', dx5: false, config: false, brew: true, werror: true, clang-tidy: false, cmake-args: '-DCMAKE_SYSTEM_NAME=iOS', ios: true } - { name: 'Emscripten', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, emsdk: true, werror: true, clang-tidy: false, cmake-wrapper: 'emcmake' } - { name: 'Nintendo 3DS', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, n3ds: true, werror: true, clang-tidy: false, container: 'devkitpro/devkitarm:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/3DS.cmake' } - - { name: 'Nintendo Switch', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, nx: true, werror: true, clang-tidy: false, container: 'devkitpro/devkita64:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/Switch.cmake' } + - { name: 'Nintendo Switch', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, nx: true, werror: true, clang-tidy: false, container: 'devkitpro/devkita64:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/Switch.cmake' } - { name: 'Xbox One', os: 'windows-latest', generator: 'Visual Studio 17 2022', dx5: false, config: false, msvc: true, werror: false, clang-tidy: false, vc-arch: 'amd64', cmake-args: '-DCMAKE_SYSTEM_NAME=WindowsStore -DCMAKE_SYSTEM_VERSION=10.0.26100.0', xbox-one: true} - { name: 'Android', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, android: true, werror: true, clang-tidy: false,} - { name: 'Vita', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, vita: true, werror: true, clang-tidy: false, cmake-args: '--toolchain /usr/local/vitasdk/share/vita.toolchain.cmake'} + - { name: 'DOS', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, dos: true, werror: true, clang-tidy: false, cmake-args: '--toolchain $GITHUB_WORKSPACE/CMake/i586-pc-msdosdjgpp.cmake'} steps: - name: Setup vcvars if: ${{ !!matrix.msvc }} @@ -134,7 +135,7 @@ jobs: echo "$VITASDK/bin" >> $GITHUB_PATH ./install-all.sh - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Checkout LFS if: ${{ matrix.build-assets }} @@ -143,11 +144,15 @@ jobs: - name: Setup Java (Android) if: ${{ matrix.android }} - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: '17' + - name: 'Set up DJGPP toolchain' + uses: ./.github/actions/setup-djgpp-toolchain + if: ${{ matrix.dos }} + - name: Get CMake (Android) if: ${{ matrix.android }} uses: lukka/get-cmake@latest @@ -271,7 +276,7 @@ jobs: run: (cd build/assets && zip -r ../dist/isle-assets.zip .) - name: Upload Build Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: '${{ matrix.name }}' path: | @@ -287,7 +292,7 @@ jobs: name: 'FreeBSD' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build on FreeBSD uses: vmactions/freebsd-vm@v1 @@ -307,7 +312,7 @@ jobs: cd build && cpack . - name: Upload Build Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: 'FreeBSD' path: build/dist/isle-* @@ -331,7 +336,7 @@ jobs: options: --privileged steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build Flatpak uses: flatpak/flatpak-github-actions/flatpak-builder@v6 @@ -345,7 +350,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install LLVM and Clang uses: KyleMayes/install-llvm-action@v1 @@ -389,14 +394,14 @@ jobs: - freebsd steps: - name: Download All Artifacts - uses: actions/download-artifact@main + uses: actions/download-artifact@v8 with: pattern: "*" path: Release merge-multiple: true - name: Checkout uploadtool - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: 'probonopd/uploadtool' path: 'uploadtool' diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ca316590..6fe8ab2e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -48,7 +48,7 @@ jobs: labels: ${{ steps.meta.outputs.labels }} - name: Generate artifact attestation - uses: actions/attest-build-provenance@v2 + uses: actions/attest-build-provenance@v4 with: subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} subject-digest: ${{ steps.push.outputs.digest }} diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index cb3918b7..e95415da 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -5,8 +5,8 @@ if(DOWNLOAD_DEPENDENCIES) include(FetchContent) FetchContent_Declare( miniaudio - URL https://github.com/mackron/miniaudio/archive/refs/tags/0.11.24.tar.gz - URL_MD5 19e8eb21223c56a4a2d167d04decddc9 + URL https://github.com/mackron/miniaudio/archive/refs/tags/0.11.25.tar.gz + URL_HASH MD5=6fae6da8f30afb3ddcba26fcaa64f540 ) block() set(BUILD_SHARED_LIBS OFF) @@ -34,6 +34,12 @@ target_compile_definitions(miniaudio PUBLIC MA_NO_RUNTIME_LINKING ) +if(DJGPP) + # DOS is single-threaded so we provide non-atomic __atomic_*_8 stubs + # (see CMakeLists.txt top-level comment about -march=i486). + target_sources(miniaudio PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/djgpp_atomic64.c") +endif() + if(DOWNLOAD_DEPENDENCIES) include(FetchContent) FetchContent_Declare( diff --git a/3rdparty/djgpp_atomic64.c b/3rdparty/djgpp_atomic64.c new file mode 100644 index 00000000..011e8c6a --- /dev/null +++ b/3rdparty/djgpp_atomic64.c @@ -0,0 +1,109 @@ +/* + * Non-atomic 64-bit __atomic_*_8 stubs for DJGPP / DOS. + * + * DOS is single-threaded so real atomics are unnecessary. GCC emits calls to + * these helper functions when targeting i486 (or when __i586__ is undefined) + * because the ISA lacks a native 64-bit atomic instruction. Normally libatomic + * provides them, but DJGPP doesn't ship libatomic. + * + * Every function simply performs a plain (non-atomic) load/store/exchange/CAS + * which is perfectly safe in a single-threaded environment. + */ + +#include +#include + +uint64_t __atomic_load_8(const volatile void *ptr, int memorder) +{ + (void)memorder; + uint64_t val; + memcpy(&val, (const void *)ptr, sizeof(val)); + return val; +} + +void __atomic_store_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + memcpy((void *)ptr, &val, sizeof(val)); +} + +uint64_t __atomic_exchange_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + memcpy((void *)ptr, &val, sizeof(val)); + return old; +} + +int __atomic_compare_exchange_8( + volatile void *ptr, + void *expected, + uint64_t desired, + int success_memorder, + int failure_memorder +) +{ + (void)success_memorder; + (void)failure_memorder; + uint64_t current; + memcpy(¤t, (void *)ptr, sizeof(current)); + uint64_t exp; + memcpy(&exp, expected, sizeof(exp)); + if (current == exp) { + memcpy((void *)ptr, &desired, sizeof(desired)); + return 1; + } + memcpy(expected, ¤t, sizeof(current)); + return 0; +} + +uint64_t __atomic_fetch_add_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old + val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_sub_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old - val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_and_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old & val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_or_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old | val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_xor_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old ^ val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} diff --git a/3rdparty/miniaudio b/3rdparty/miniaudio index 13d161bc..9634bedb 160000 --- a/3rdparty/miniaudio +++ b/3rdparty/miniaudio @@ -1 +1 @@ -Subproject commit 13d161bc8d856ad61ae46b798bbeffc0f49808e8 +Subproject commit 9634bedb5b5a2ca38c1ee7108a9358a4e233f14d diff --git a/CMake/djgpp-platform-overrides.cmake b/CMake/djgpp-platform-overrides.cmake new file mode 100644 index 00000000..c919f843 --- /dev/null +++ b/CMake/djgpp-platform-overrides.cmake @@ -0,0 +1,17 @@ +# DJGPP platform overrides for DOS +# +# CMake's built-in Platform/DOS.cmake assumes OpenWatcom naming conventions +# (no prefix, .lib suffix, CMAKE_LINK_LIBRARY_SUFFIX=".lib"). DJGPP uses +# standard Unix/GCC conventions for its system libraries (lib prefix, .a +# suffix — e.g. libm.a). +# +# This file is loaded via CMAKE_USER_MAKE_RULES_OVERRIDE in the toolchain +# file, which runs *after* the platform module has set its defaults, giving +# us the final say on these variables. + +set(CMAKE_STATIC_LIBRARY_PREFIX "lib") +set(CMAKE_STATIC_LIBRARY_SUFFIX ".a") +set(CMAKE_LINK_LIBRARY_SUFFIX "") +set(CMAKE_FIND_LIBRARY_PREFIXES "lib" "") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".a" ".lib") +set(CMAKE_EXECUTABLE_SUFFIX ".exe") \ No newline at end of file diff --git a/CMake/i586-pc-msdosdjgpp.cmake b/CMake/i586-pc-msdosdjgpp.cmake new file mode 100644 index 00000000..8a4e765f --- /dev/null +++ b/CMake/i586-pc-msdosdjgpp.cmake @@ -0,0 +1,82 @@ +set(CMAKE_SYSTEM_NAME DOS) + +set(DJGPP TRUE) + +# CMake's Platform/DOS.cmake assumes OpenWatcom naming conventions (no prefix, +# .lib suffix). DJGPP uses standard Unix/GCC conventions for its system +# libraries (lib prefix, .a suffix — e.g. libm.a), so we override the platform +# defaults via CMAKE_USER_MAKE_RULES_OVERRIDE, which runs *after* the platform +# module has set its defaults, giving us the final say on these variables. +# The path must be cached because CMake re-parses the toolchain file during +# try_compile, where CMAKE_CURRENT_LIST_DIR may point elsewhere. +set(DJGPP_PLATFORM_OVERRIDES "${CMAKE_CURRENT_LIST_DIR}/djgpp-platform-overrides.cmake" CACHE FILEPATH "" FORCE) +set(CMAKE_USER_MAKE_RULES_OVERRIDE "${DJGPP_PLATFORM_OVERRIDES}") + +set(CMAKE_STATIC_LIBRARY_PREFIX "lib") +set(CMAKE_STATIC_LIBRARY_SUFFIX ".a") +set(CMAKE_SHARED_LIBRARY_PREFIX "") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dll") +set(CMAKE_IMPORT_LIBRARY_PREFIX "lib") +set(CMAKE_IMPORT_LIBRARY_SUFFIX ".a") +set(CMAKE_EXECUTABLE_SUFFIX ".exe") +set(CMAKE_LINK_LIBRARY_SUFFIX "") +set(CMAKE_DL_LIBS "") + +set(CMAKE_FIND_LIBRARY_PREFIXES "lib") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") + +# +# CMake toolchain file for DJGPP. Usage: +# +# 1. Download and extract DGJPP +# 2. Add directory containing i586-pc-msdosdjgpp-gcc to PATH environment variable +# 3. When configuring your CMake project, specify the toolchain file like this: +# +# cmake -DCMAKE_TOOLCHAIN_FILE=path/to/i586-pc-msdosdjgpp.cmake ... +# + +# specify the cross compiler +find_program(CMAKE_C_COMPILER NAMES "i586-pc-msdosdjgpp-gcc" "i386-pc-msdosdjgpp-gcc" REQUIRED) +find_program(CMAKE_CXX_COMPILER NAMES "i586-pc-msdosdjgpp-g++" "i386-pc-msdosdjgpp-g++" REQUIRED) + +execute_process(COMMAND "${CMAKE_C_COMPILER}" -print-search-dirs + RESULT_VARIABLE CC_SEARCH_DIRS_RESULT + OUTPUT_VARIABLE CC_SEARCH_DIRS_OUTPUT) + +if(CC_SEARCH_DIRS_RESULT) + message(FATAL_ERROR "Could not determine search dirs") +endif() + +string(REGEX MATCH ".*libraries: (.*).*" CC_SD_LIBS "${CC_SEARCH_DIRS_OUTPUT}") +string(STRIP "${CMAKE_MATCH_1}" CC_SEARCH_DIRS) +string(REPLACE ":" ";" CC_SEARCH_DIRS "${CC_SEARCH_DIRS}") + +foreach(CC_SEARCH_DIR ${CC_SEARCH_DIRS}) + if(CC_SEARCH_DIR MATCHES "=.*") + string(REGEX MATCH "=(.*)" CC_LIB "${CC_SEARCH_DIR}") + set(CC_SEARCH_DIR "${CMAKE_MATCH_1}") + endif() + if(IS_DIRECTORY "${CC_SEARCH_DIR}") + if(IS_DIRECTORY "${CC_SEARCH_DIR}/../include" OR IS_DIRECTORY "${CC_SEARCH_DIR}/../lib" OR IS_DIRECTORY "${CC_SEARCH_DIR}/../bin") + list(APPEND CC_ROOTS "${CC_SEARCH_DIR}/..") + else() + list(APPEND CC_ROOTS "${CC_SEARCH_DIR}") + endif() + endif() +endforeach() + +list(APPEND CMAKE_FIND_ROOT_PATH ${CC_ROOTS}) + +# search for programs in the host directories +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + +# for libraries, headers and packages in the target directories +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_LIBRARY}) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +endif() +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_INCLUDE}) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endif() +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_PACKAGE}) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +endif() \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f3f6dbd7..00782ef3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,15 +86,145 @@ option(ISLE_WERROR "Treat warnings as errors" OFF) cmake_dependent_option(ISLE_USE_DX5 "Build with internal DirectX 5 SDK" "${NOT_MINGW}" "WIN32;CMAKE_SIZEOF_VOID_P EQUAL 4" OFF) cmake_dependent_option(ISLE_MINIWIN "Use miniwin" ON "NOT ISLE_USE_DX5" OFF) cmake_dependent_option(ISLE_EXTENSIONS "Use extensions" ON "NOT ISLE_USE_DX5;NOT WINDOWS_STORE" OFF) -cmake_dependent_option(ISLE_USE_LWS "Use libwebsockets for native multiplayer" ON "ISLE_EXTENSIONS;NOT EMSCRIPTEN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT VITA" OFF) -cmake_dependent_option(ISLE_BUILD_CONFIG "Build CONFIG.EXE application" ON "MSVC OR ISLE_MINIWIN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT WINDOWS_STORE;NOT VITA" OFF) +cmake_dependent_option(ISLE_USE_LWS "Use libwebsockets for native multiplayer" ON "ISLE_EXTENSIONS;NOT DOS;NOT EMSCRIPTEN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT VITA" OFF) +cmake_dependent_option(ISLE_BUILD_CONFIG "Build CONFIG.EXE application" ON "MSVC OR ISLE_MINIWIN;NOT DOS;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT WINDOWS_STORE;NOT VITA" OFF) cmake_dependent_option(ISLE_COMPILE_SHADERS "Compile shaders" ON "SDL_SHADERCROSS_BIN;TARGET Python3::Interpreter" OFF) -cmake_dependent_option(CMAKE_POSITION_INDEPENDENT_CODE "Build with -fPIC" ON "NOT VITA" OFF) +cmake_dependent_option(CMAKE_POSITION_INDEPENDENT_CODE "Build with -fPIC" ON "NOT DOS;NOT VITA" OFF) +cmake_dependent_option(ISLE_USE_GLIDE "Build with 3dfx Glide support (Voodoo)" OFF "DOS" OFF) option(ENABLE_CLANG_TIDY "Enable clang-tidy") option(DOWNLOAD_DEPENDENCIES "Download dependencies" ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" CACHE PATH "Directory where to put executables and dll") set(ISLE_EMSCRIPTEN_HOST "" CACHE STRING "Host URL for Emscripten streaming (e.g., https://test.com)") -cmake_dependent_option(BUILD_SHARED_LIBS "Build lego1 as a shared library" ON "NOT EMSCRIPTEN;NOT VITA" OFF) +cmake_dependent_option(BUILD_SHARED_LIBS "Build lego1 as a shared library" ON "NOT DOS;NOT EMSCRIPTEN;NOT VITA" OFF) + +if(DOS) + # DJGPP targets i386 by default. We use i486 rather than i586 because i586 + # enables cmpxchg8b which GCC uses for 64-bit atomics (lock cmpxchg8b) — + # an instruction DOSBox does not support. The missing __atomic_*_8 helpers + # (normally in libatomic, which DJGPP doesn't ship) are provided as simple + # non-atomic stubs in 3rdparty/djgpp_atomic64.c since DOS is single-threaded. + add_compile_options(-march=i486) + + if(ISLE_USE_GLIDE) + # 3dfx Glide support for Voodoo cards. + # See: https://federicotech.wordpress.com/2024/12/19/compiling-3dfx-glide-2-and-3-in-ms-dos-with-djgpp/ + set(GLIDE_VERSION "3" CACHE STRING "Glide API version: 2 or 3") + set_property(CACHE GLIDE_VERSION PROPERTY STRINGS 2 3) + set(GLIDE_HW "sst1" CACHE STRING "Target Glide hardware: sst1 (Voodoo1), sst96 (Rush), cvg (Voodoo2), h3 (Banshee), h5 (Voodoo3/4/5)") + set_property(CACHE GLIDE_HW PROPERTY STRINGS sst1 sst96 cvg h3 h5) + + include(FetchContent) + FetchContent_Declare( + glide + GIT_REPOSITORY "https://github.com/sezero/glide.git" + GIT_TAG "glide-devel-sezero" + ) + FetchContent_MakeAvailable(glide) + set(GLIDE_SRC_DIR "${glide_SOURCE_DIR}") + if(GLIDE_VERSION STREQUAL "3") + set(GLIDE_ROOT "${GLIDE_SRC_DIR}/glide3x") + else() + set(GLIDE_ROOT "${GLIDE_SRC_DIR}/glide2x") + endif() + + # Copy swlibs into glide dir (required by the build system) + if(NOT EXISTS "${GLIDE_ROOT}/swlibs") + file(COPY "${GLIDE_SRC_DIR}/swlibs" DESTINATION "${GLIDE_ROOT}") + endif() + + # Patch fxglide.h for h3/h5 builds: the P6 fence inline asm check + # fails when HOST_CC is x86_64 gcc (no __i386__ defined). + foreach(_glide_hw_dir h3 h5) + set(_fxglide "${GLIDE_ROOT}/${_glide_hw_dir}/glide/src/fxglide.h") + if(EXISTS "${_fxglide}") + file(READ "${_fxglide}" _fxglide_content) + string(FIND "${_fxglide_content}" "defined(__x86_64__)" _already_patched) + if(_already_patched EQUAL -1) + string(REPLACE + "defined(__GNUC__) && defined(__i386__)" + "defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))" + _fxglide_content "${_fxglide_content}") + file(WRITE "${_fxglide}" "${_fxglide_content}") + endif() + endif() + endforeach() + + # Determine include and lib paths based on hardware target + set(_glide_subdir "glide") + if(GLIDE_VERSION STREQUAL "3") + set(_glide_subdir "glide3") + endif() + + if(GLIDE_HW STREQUAL "sst1" OR GLIDE_HW STREQUAL "sst96") + set(GLIDE_INCLUDE_DIR + "${GLIDE_ROOT}/swlibs/fxmisc" + "${GLIDE_ROOT}/sst1/${_glide_subdir}/src" + "${GLIDE_ROOT}/sst1/init" + ) + set(GLIDE_LIB_DIR "${GLIDE_ROOT}/sst1/lib/${GLIDE_HW}") + elseif(GLIDE_HW STREQUAL "cvg") + set(GLIDE_INCLUDE_DIR + "${GLIDE_ROOT}/swlibs/fxmisc" + "${GLIDE_ROOT}/cvg/${_glide_subdir}/src" + "${GLIDE_ROOT}/cvg/incsrc" + ) + set(GLIDE_LIB_DIR "${GLIDE_ROOT}/cvg/lib") + elseif(GLIDE_HW STREQUAL "h3") + set(GLIDE_INCLUDE_DIR + "${GLIDE_ROOT}/swlibs/fxmisc" + "${GLIDE_ROOT}/h3/${_glide_subdir}/src" + "${GLIDE_ROOT}/h3/incsrc" + ) + set(GLIDE_LIB_DIR "${GLIDE_ROOT}/h3/lib") + elseif(GLIDE_HW STREQUAL "h5") + set(GLIDE_INCLUDE_DIR + "${GLIDE_ROOT}/swlibs/fxmisc" + "${GLIDE_ROOT}/h5/${_glide_subdir}/src" + "${GLIDE_ROOT}/h5/incsrc" + ) + set(GLIDE_LIB_DIR "${GLIDE_ROOT}/h5/lib") + endif() + + # Build Glide using its native DJGPP Makefile. + # We pass CC and AR from the CMake toolchain so it uses the correct cross-compiler. + if(GLIDE_VERSION STREQUAL "3") + set(_glide_lib_name "libgld3x.a") + set(_glide_lib_var "GLIDE_LIB=libgld3x.a") + set(_glide_imp_var "GLIDE_IMP=libgld3i.a") + else() + set(_glide_lib_name "libgld2x.a") + set(_glide_lib_var "GLIDE_LIB=libgld2x.a") + set(_glide_imp_var "GLIDE_IMP=libgld2i.a") + endif() + set(GLIDE_LIB_FILE "${GLIDE_LIB_DIR}/${_glide_lib_name}") + if(NOT EXISTS "${GLIDE_LIB_FILE}") + message(STATUS "Building 3dfx Glide ${GLIDE_VERSION}.x for ${GLIDE_HW}...") + execute_process( + COMMAND make + ${_glide_lib_var} + ${_glide_imp_var} + FX_GLIDE_HW=${GLIDE_HW} + CC=${CMAKE_C_COMPILER} + AR=${CMAKE_AR} + HOST_CC=gcc + -f Makefile.DJ + WORKING_DIRECTORY "${GLIDE_ROOT}" + RESULT_VARIABLE GLIDE_BUILD_RESULT + ) + if(NOT GLIDE_BUILD_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build Glide library. " + "You may need to build manually: cd ${GLIDE_ROOT} && make ${_glide_lib_var} FX_GLIDE_HW=${GLIDE_HW} CC=${CMAKE_C_COMPILER} AR=${CMAKE_AR} -f Makefile.DJ") + endif() + endif() + + if(NOT EXISTS "${GLIDE_LIB_FILE}") + message(FATAL_ERROR "Glide library not found at ${GLIDE_LIB_FILE} after build.") + endif() + + set(GLIDE_LIBRARY "${GLIDE_LIB_FILE}") + message(STATUS "3dfx Glide: ${GLIDE_HW} (${GLIDE_LIBRARY})") + endif() +endif() message(STATUS "Isle app: ${ISLE_BUILD_APP}") message(STATUS "Config app: ${ISLE_BUILD_CONFIG}") diff --git a/ISLE/isleapp.cpp b/ISLE/isleapp.cpp index 52633a00..07d855f5 100644 --- a/ISLE/isleapp.cpp +++ b/ISLE/isleapp.cpp @@ -155,11 +155,19 @@ IsleApp::IsleApp() m_using8bit = FALSE; m_using16bit = TRUE; m_hasLightSupport = FALSE; +#ifdef __DJGPP__ + m_drawCursor = TRUE; +#else m_drawCursor = FALSE; +#endif m_use3dSound = TRUE; m_useMusic = TRUE; m_wideViewAngle = TRUE; +#ifdef __DJGPP__ + m_islandQuality = 1; +#else m_islandQuality = 2; +#endif m_islandTexture = 1; m_gameStarted = FALSE; m_frameDelta = 10; @@ -191,14 +199,22 @@ IsleApp::IsleApp() m_mediaPath = NULL; m_iniPath = NULL; m_maxLod = RealtimeView::GetUserMaxLOD(); +#ifdef __DJGPP__ + m_maxLod = 1.0f; +#endif m_maxAllowedExtras = m_islandQuality <= 1 ? 10 : 20; m_transitionType = MxTransitionManager::e_mosaic; m_cursorSensitivity = 4; m_touchScheme = LegoInputManager::e_gamepad; m_haptic = TRUE; m_wasd = FALSE; +#ifdef __DJGPP__ + m_xRes = 320; + m_yRes = 200; +#else m_xRes = 640; m_yRes = 480; +#endif m_exclusiveXRes = m_xRes; m_exclusiveYRes = m_yRes; m_exclusiveFrameRate = 60.00f; @@ -243,6 +259,10 @@ void IsleApp::Close() TransitionManager()->SetWaitIndicator(NULL); Lego()->Resume(); + if (BackgroundAudioManager()) { + BackgroundAudioManager()->Stop(); + } + while (Streamer()->Close(NULL) == SUCCESS) { } @@ -324,8 +344,16 @@ SDL_AppResult SDL_AppInit(void** appstate, int argc, char** argv) SDL_SetHint(SDL_HINT_MOUSE_TOUCH_EVENTS, "0"); SDL_SetHint(SDL_HINT_TOUCH_MOUSE_EVENTS, "0"); +#ifdef __DJGPP__ + SDL_SetHint("SDL_DOS_ALLOW_DIRECT_FRAMEBUFFER", "1"); +#endif - if (!SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_GAMEPAD | SDL_INIT_HAPTIC)) { + Uint32 initFlags = SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_GAMEPAD; +#ifndef __DJGPP__ + initFlags |= SDL_INIT_HAPTIC; +#endif + + if (!SDL_Init(initFlags)) { char buffer[256]; SDL_snprintf( buffer, @@ -717,11 +745,17 @@ SDL_AppResult SDL_AppEvent(void* appstate, SDL_Event* event) g_lastMouseX = event->motion.x; g_lastMouseY = event->motion.y; +#ifdef __DJGPP__ + if (VideoManager()) { + VideoManager()->MoveCursor(Min((MxS32) g_lastMouseX, 639), Min((MxS32) g_lastMouseY, 479)); + } +#else SDL_ShowCursor(); g_isle->SetDrawCursor(FALSE); if (VideoManager()) { VideoManager()->SetCursorBitmap(NULL); } +#endif break; case SDL_EVENT_FINGER_MOTION: { g_mousemoved = TRUE; @@ -924,6 +958,9 @@ MxResult IsleApp::SetupWindow() return FAILURE; } + g_targetWidth = m_xRes; + g_targetHeight = m_yRes; + SetupVideoFlags( m_fullScreen, m_flipSurfaces, @@ -954,7 +991,7 @@ MxResult IsleApp::SetupWindow() SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_HEIGHT_NUMBER, g_targetHeight); SDL_SetBooleanProperty(props, SDL_PROP_WINDOW_CREATE_FULLSCREEN_BOOLEAN, m_fullScreen); SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, WINDOW_TITLE); -#if defined(MINIWIN) && !defined(__3DS__) && !defined(WINDOWS_STORE) && !defined(__vita__) +#if defined(MINIWIN) && !defined(__3DS__) && !defined(WINDOWS_STORE) && !defined(__vita__) && !defined(__DJGPP__) SDL_SetBooleanProperty(props, SDL_PROP_WINDOW_CREATE_OPENGL_BOOLEAN, true); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24); @@ -969,6 +1006,17 @@ MxResult IsleApp::SetupWindow() SDL_SetPointerProperty(SDL_GetWindowProperties(window), ISLE_PROP_WINDOW_CREATE_VIDEO_PARAM, &m_videoParam); +#ifdef __DJGPP__ + // DOS: request an 8-bit (INDEX8) fullscreen mode so the VESA + // framebuffer is paletted and we can blit INDEX8 surfaces directly. + { + SDL_DisplayMode mode = {}; + mode.w = g_targetWidth; + mode.h = g_targetHeight; + mode.format = SDL_PIXELFORMAT_INDEX8; + SDL_SetWindowFullscreenMode(window, &mode); + } +#else if (m_exclusiveFullScreen && m_fullScreen) { SDL_DisplayMode closestMode; SDL_DisplayID displayID = SDL_GetDisplayForWindow(window); @@ -983,6 +1031,7 @@ MxResult IsleApp::SetupWindow() SDL_SetWindowFullscreenMode(window, &closestMode); } } +#endif #ifdef MINIWIN m_windowHandle = reinterpret_cast(window); @@ -1251,7 +1300,7 @@ bool IsleApp::LoadConfig() m_videoParam.GetRect() = MxRect32(0, 0, (m_xRes - 1), (m_yRes - 1)); } m_frameRate = (1000.0f / iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta)); - m_frameDelta = static_cast(std::round(iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta))); + m_frameDelta = static_cast(iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta)); m_videoParam.SetMSAASamples((m_msaaSamples = iniparser_getint(dict, "isle:MSAA", m_msaaSamples))); m_videoParam.SetAnisotropic((m_anisotropic = iniparser_getdouble(dict, "isle:Anisotropic", m_anisotropic))); m_activeInBackground = iniparser_getboolean(dict, "isle:Active in Background", m_activeInBackground); diff --git a/LEGO1/lego/legoomni/include/legovideomanager.h b/LEGO1/lego/legoomni/include/legovideomanager.h index d140ee65..b20ccc10 100644 --- a/LEGO1/lego/legoomni/include/legovideomanager.h +++ b/LEGO1/lego/legoomni/include/legovideomanager.h @@ -76,6 +76,7 @@ class LegoVideoManager : public MxVideoManager { void SetRender3D(MxBool p_render3d) { m_render3d = p_render3d; } void SetUnk0x554(MxBool p_unk0x554) { m_unk0x554 = p_unk0x554; } + MxBool GetDrawCursor() { return m_drawCursor; } // SYNTHETIC: LEGO1 0x1007ab20 // SYNTHETIC: BETA10 0x100d8040 @@ -88,9 +89,6 @@ class LegoVideoManager : public MxVideoManager { inline void DrawCursor(); - void DrawDigitToBuffer32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, int p_digit, uint32_t p_color); - void DrawTextToSurface32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, const char* p_text, uint32_t p_color); - Tgl::Renderer* m_renderer; // 0x64 Lego3DManager* m_3dManager; // 0x68 LegoROI* m_viewROI; // 0x6c diff --git a/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h b/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h index d85656ca..7c8c7270 100644 --- a/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h +++ b/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h @@ -49,7 +49,7 @@ class MxBackgroundAudioManager : public MxCore { void Init(); void Update(MxS32 p_targetVolume, MxS32 p_speed, MxPresenter::TickleState p_tickleState); - void Stop(); + LEGO1_EXPORT void Stop(); void LowerVolume(); void RaiseVolume(); MxResult SetPendingPresenter(MxPresenter* p_presenter, MxS32 p_speed, MxPresenter::TickleState p_tickleState); diff --git a/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp b/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp index 96617e52..3aa0df8d 100644 --- a/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp +++ b/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp @@ -77,6 +77,10 @@ void MxBackgroundAudioManager::DestroyMusic() Streamer()->Close(m_script.GetInternal()); m_enabled = FALSE; } + + m_activePresenter = NULL; + m_pendingPresenter = NULL; + m_tickleState = MxPresenter::e_idle; } // FUNCTION: LEGO1 0x1007ee40 diff --git a/LEGO1/lego/legoomni/src/video/legovideomanager.cpp b/LEGO1/lego/legoomni/src/video/legovideomanager.cpp index 3aa6044f..3ce5697b 100644 --- a/LEGO1/lego/legoomni/src/video/legovideomanager.cpp +++ b/LEGO1/lego/legoomni/src/video/legovideomanager.cpp @@ -465,7 +465,45 @@ void LegoVideoManager::DrawFPS() if (m_unk0x528->Lock(NULL, &surfaceDesc, DDLOCK_WAIT, NULL) == DD_OK) { memset(surfaceDesc.lpSurface, 0, surfaceDesc.lPitch * surfaceDesc.dwHeight); - DrawTextToSurface32((uint8_t*) surfaceDesc.lpSurface, surfaceDesc.lPitch, 0, 0, buffer, 0xFF0000FF); + // 8-bit bitmap font for FPS display + uint8_t* dst = (uint8_t*) surfaceDesc.lpSurface; + int pitch = surfaceDesc.lPitch; + const char* p = buffer; + int px = 0; + static const uint8_t g_digitFont[5][10] = { + {0b1111, 0b0001, 0b1111, 0b1111, 0b1001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111}, + {0b1001, 0b0001, 0b0001, 0b0001, 0b1001, 0b1000, 0b1000, 0b0001, 0b1001, 0b1001}, + {0b1001, 0b0001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0010, 0b1111, 0b1111}, + {0b1001, 0b0001, 0b1000, 0b0001, 0b0001, 0b0001, 0b1001, 0b0010, 0b1001, 0b0001}, + {0b1111, 0b0001, 0b1111, 0b1111, 0b0001, 0b1111, 0b1111, 0b0100, 0b1111, 0b1111}, + }; + while (*p) { + if (*p >= '0' && *p <= '9') { + int d = *p - '0'; + for (int row = 0; row < 5; ++row) { + uint8_t bits = g_digitFont[row][d]; + for (int col = 0; col < 5; ++col) { + if (bits & (1 << (4 - col))) { + for (int dy = 0; dy < 2; ++dy) { + for (int dx = 0; dx < 2; ++dx) { + dst[(row * 2 + dy) * pitch + (px + col * 2 + dx)] = 0xff; + } + } + } + } + } + px += 10; + } + else if (*p == '.') { + for (int dy = 0; dy < 2; ++dy) { + for (int dx = 0; dx < 2; ++dx) { + dst[(10 + dy) * pitch + (px + 2 + dx)] = 0xff; + } + } + px += 4; + } + ++p; + } m_unk0x528->Unlock(surfaceDesc.lpSurface); m_unk0x550 = 1.f; @@ -789,66 +827,6 @@ MxResult LegoVideoManager::ConfigureD3DRM() return SUCCESS; } -void LegoVideoManager::DrawDigitToBuffer32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, int p_digit, uint32_t p_color) -{ - if (p_digit < 0 || p_digit > 9) { - return; - } - - uint32_t* pixels = (uint32_t*) p_dst; - int rowStride = p_pitch / 4; - - // 4x5 bitmap font - const uint8_t digitFont[5][10] = { - {0b1111, 0b0001, 0b1111, 0b1111, 0b1001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111}, - {0b1001, 0b0001, 0b0001, 0b0001, 0b1001, 0b1000, 0b1000, 0b0001, 0b1001, 0b1001}, - {0b1001, 0b0001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0010, 0b1111, 0b1111}, - {0b1001, 0b0001, 0b1000, 0b0001, 0b0001, 0b0001, 0b1001, 0b0010, 0b1001, 0b0001}, - {0b1111, 0b0001, 0b1111, 0b1111, 0b0001, 0b1111, 0b1111, 0b0100, 0b1111, 0b1111}, - }; - - for (int row = 0; row < 5; ++row) { - uint8_t bits = digitFont[row][p_digit]; - for (int col = 0; col < 5; ++col) { - if (bits & (1 << (4 - col))) { - for (int dy = 0; dy < 2; ++dy) { - for (int dx = 0; dx < 2; ++dx) { - pixels[(p_y + row * 2 + dy) * rowStride + (p_x + col * 2 + dx)] = p_color; - } - } - } - } - } -} - -void LegoVideoManager::DrawTextToSurface32( - uint8_t* p_dst, - int p_pitch, - int p_x, - int p_y, - const char* p_text, - uint32_t p_color -) -{ - while (*p_text) { - if (*p_text >= '0' && *p_text <= '9') { - DrawDigitToBuffer32(p_dst, p_pitch, p_x, p_y, *p_text - '0', p_color); - p_x += 10; - } - else if (*p_text == '.') { - uint32_t* pixels = (uint32_t*) p_dst; - int rowStride = p_pitch / 4; - for (int dy = 0; dy < 2; ++dy) { - for (int dx = 0; dx < 2; ++dx) { - pixels[(p_y + 10 + dy) * rowStride + (p_x + 2 + dx)] = p_color; - } - } - p_x += 4; - } - ++p_text; - } -} - void LegoVideoManager::SetCursorBitmap(const CursorBitmap* p_cursorBitmap) { if (p_cursorBitmap == NULL) { diff --git a/LEGO1/lego/legoomni/src/worlds/infocenter.cpp b/LEGO1/lego/legoomni/src/worlds/infocenter.cpp index bebf667d..48468a9e 100644 --- a/LEGO1/lego/legoomni/src/worlds/infocenter.cpp +++ b/LEGO1/lego/legoomni/src/worlds/infocenter.cpp @@ -1499,7 +1499,7 @@ void Infocenter::StartCredits() GetViewManager()->RemoveAll(NULL); InvokeAction(Extra::e_opendisk, *g_creditsScript, CreditsScript::c_LegoCredits, NULL); - SetAppCursor(e_cursorArrow); + SetAppCursor(VideoManager()->GetDrawCursor() ? e_cursorNone : e_cursorArrow); } // FUNCTION: LEGO1 0x10071250 diff --git a/LEGO1/omni/src/video/mxdisplaysurface.cpp b/LEGO1/omni/src/video/mxdisplaysurface.cpp index 4ebccb58..f7e265bb 100644 --- a/LEGO1/omni/src/video/mxdisplaysurface.cpp +++ b/LEGO1/omni/src/video/mxdisplaysurface.cpp @@ -305,6 +305,7 @@ void MxDisplaySurface::Destroy() // FUNCTION: BETA10 0x1013fe15 void MxDisplaySurface::SetPalette(MxPalette* p_palette) { +#ifndef MINIWIN if ((m_surfaceDesc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) == DDPF_PALETTEINDEXED8) { m_ddSurface1->SetPalette(p_palette->CreateNativePalette()); m_ddSurface2->SetPalette(p_palette->CreateNativePalette()); @@ -326,8 +327,10 @@ void MxDisplaySurface::SetPalette(MxPalette* p_palette) DeleteObject(hpal); } } +#else + m_ddSurface1->SetPalette(p_palette->CreateNativePalette()); + m_ddSurface2->SetPalette(p_palette->CreateNativePalette()); -#ifndef MINIWIN MxS32 bitCount = m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount; if (bitCount == 8) { return; @@ -449,17 +452,6 @@ void MxDisplaySurface::VTable0x28( } #endif - if (m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount != 32) { - DDCOLORKEY colorKey; - if (m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount == 8) { - colorKey.dwColorSpaceLowValue = colorKey.dwColorSpaceHighValue = 0x10; - } - else { - colorKey.dwColorSpaceLowValue = colorKey.dwColorSpaceHighValue = RGB555_CREATE(0x1f, 0, 0x1f); - } - tempSurface->SetColorKey(DDCKEY_SRCBLT, &colorKey); - } - DDSURFACEDESC tempDesc; memset(&tempDesc, 0, sizeof(tempDesc)); tempDesc.dwSize = sizeof(tempDesc); @@ -511,10 +503,10 @@ void MxDisplaySurface::VTable0x28( if (m_videoParam.Flags().GetDoubleScaling()) { RECT destRect = {p_right, p_bottom, p_right + p_width * 2, p_bottom + p_height * 2}; - m_ddSurface2->Blt(&destRect, tempSurface, NULL, DDBLT_WAIT | DDBLT_KEYSRC, NULL); + m_ddSurface2->Blt(&destRect, tempSurface, NULL, DDBLT_WAIT, NULL); } else { - m_ddSurface2->BltFast(p_right, p_bottom, tempSurface, NULL, DDBLTFAST_WAIT | DDBLTFAST_SRCCOLORKEY); + m_ddSurface2->BltFast(p_right, p_bottom, tempSurface, NULL, DDBLTFAST_WAIT); } tempSurface->Release(); @@ -1083,10 +1075,6 @@ LPDIRECTDRAWSURFACE MxDisplaySurface::FUN_100bc8b0(MxS32 p_width, MxS32 p_height return NULL; } - if (surfaceDesc.ddpfPixelFormat.dwRGBBitCount == 8) { - return NULL; - } - surfaceDesc.dwWidth = p_width; surfaceDesc.dwHeight = p_height; surfaceDesc.dwFlags = DDSD_PIXELFORMAT | DDSD_WIDTH | DDSD_HEIGHT | DDSD_CAPS; diff --git a/LEGO1/viewmanager/viewmanager.cpp b/LEGO1/viewmanager/viewmanager.cpp index ac1a36a3..9b4a3482 100644 --- a/LEGO1/viewmanager/viewmanager.cpp +++ b/LEGO1/viewmanager/viewmanager.cpp @@ -238,7 +238,9 @@ inline void ViewManager::ManageVisibilityAndDetailRecursively(ViewROI* p_from, i const CompoundObject* comp = p_from->GetComp(); if (p_lodLevel == ViewROI::c_lodLevelUnset) { - if (p_from->GetWorldBoundingSphere().Radius() > 0.001F) { + // FIX: Use 0.002 threshold to avoid x87 extended precision boundary + // issues where 0.001 sentinel radius compares as > 0.001F on x87. + if (p_from->GetWorldBoundingSphere().Radius() > 0.002F) { float projectedSize = ProjectedSize(p_from->GetWorldBoundingSphere()); if (RealtimeView::GetUserMaxLOD() <= 5.0f && projectedSize < seconds_allowed * g_viewDistance) { diff --git a/miniwin/CMakeLists.txt b/miniwin/CMakeLists.txt index 112adfda..1be54f00 100644 --- a/miniwin/CMakeLists.txt +++ b/miniwin/CMakeLists.txt @@ -69,6 +69,21 @@ if(NOT (VITA OR WINDOWS_STORE)) endif() endif() +if(DOS) + list(REMOVE_ITEM GRAPHICS_BACKENDS USE_SDL_GPU USE_OPENGL1 USE_OPENGLES2) #USE_SDL_GPU + if(ISLE_USE_GLIDE) + target_sources(miniwin PRIVATE src/d3drm/backends/glide/renderer.cpp) + target_include_directories(miniwin PRIVATE ${GLIDE_INCLUDE_DIR}) + target_link_libraries(miniwin PRIVATE ${GLIDE_LIBRARY}) + if(GLIDE_VERSION STREQUAL "3") + target_compile_definitions(miniwin PRIVATE GLIDE3=1) + endif() + list(APPEND GRAPHICS_BACKENDS USE_GLIDE) + endif() +endif() + +list(APPEND GRAPHICS_BACKENDS USE_PALETTE_SW_RENDER) + if(NINTENDO_SWITCH) # Remove USE_OPENGL1 as incompatible. # Remove everything else as not needed. @@ -139,6 +154,12 @@ if(USE_SOFTWARE_RENDER IN_LIST GRAPHICS_BACKENDS) ) endif() +if(USE_PALETTE_SW_RENDER IN_LIST GRAPHICS_BACKENDS) + target_sources(miniwin PRIVATE + src/d3drm/backends/palettesw/renderer.cpp + ) +endif() + target_compile_definitions(miniwin PUBLIC MINIWIN) target_include_directories(miniwin diff --git a/miniwin/src/d3drm/backends/glide/renderer.cpp b/miniwin/src/d3drm/backends/glide/renderer.cpp new file mode 100644 index 00000000..2aacc6a9 --- /dev/null +++ b/miniwin/src/d3drm/backends/glide/renderer.cpp @@ -0,0 +1,1490 @@ +#include "d3drmrenderer.h" +#include "d3drmrenderer_glide.h" +#include "ddsurface_impl.h" +#include "mathutils.h" +#include "meshutils.h" +#include "miniwin.h" + +#include +#include +#include +#include + +extern "C" +{ +#include +} + +static void ProjectVertex( + const D3DRMMATRIX4D& projection, + int screenW, + int screenH, + const D3DVECTOR& v, + float& outX, + float& outY, + float& outZ, + float& outW +) +{ + float px = projection[0][0] * v.x + projection[1][0] * v.y + projection[2][0] * v.z + projection[3][0]; + float py = projection[0][1] * v.x + projection[1][1] * v.y + projection[2][1] * v.z + projection[3][1]; + float pz = projection[0][2] * v.x + projection[1][2] * v.y + projection[2][2] * v.z + projection[3][2]; + float pw = projection[0][3] * v.x + projection[1][3] * v.y + projection[2][3] * v.z + projection[3][3]; + + outW = pw; + if (pw != 0.0f) { + float invW = 1.0f / pw; + px *= invW; + py *= invW; + pz *= invW; + } + + outX = (px * 0.5f + 0.5f) * screenW; + outY = (1.0f - (py * 0.5f + 0.5f)) * screenH; + outZ = pz; +} + +static SDL_Color ApplyLighting( + const std::vector& lights, + const D3DVECTOR& position, + const D3DVECTOR& oNormal, + const Matrix3x3& normalMatrix, + const Appearance& appearance +) +{ + FColor specular = {0, 0, 0, 0}; + FColor diffuse = {0, 0, 0, 0}; + + D3DVECTOR normal = Normalize(TransformNormal(oNormal, normalMatrix)); + + for (const auto& light : lights) { + FColor lightColor = light.color; + + if (light.positional == 0.0f && light.directional == 0.0f) { + diffuse.r += lightColor.r; + diffuse.g += lightColor.g; + diffuse.b += lightColor.b; + continue; + } + + D3DVECTOR lightVec; + if (light.directional == 1.0f) { + lightVec = {-light.direction.x, -light.direction.y, -light.direction.z}; + } + else if (light.positional == 1.0f) { + lightVec = {light.position.x - position.x, light.position.y - position.y, light.position.z - position.z}; + } + lightVec = Normalize(lightVec); + + float dotNL = DotProduct(normal, lightVec); + if (dotNL > 0.0f) { + diffuse.r += dotNL * lightColor.r; + diffuse.g += dotNL * lightColor.g; + diffuse.b += dotNL * lightColor.b; + + if (appearance.shininess > 0.0f && light.directional == 1.0f) { + D3DVECTOR viewVec = Normalize({-position.x, -position.y, -position.z}); + D3DVECTOR H = Normalize({lightVec.x + viewVec.x, lightVec.y + viewVec.y, lightVec.z + viewVec.z}); + + float dotNH = std::max(DotProduct(normal, H), 0.0f); + float spec = std::pow(dotNH, appearance.shininess); + + specular.r += spec * lightColor.r; + specular.g += spec * lightColor.g; + specular.b += spec * lightColor.b; + } + } + } + + return SDL_Color{ + static_cast(std::min(255.0f, diffuse.r * appearance.color.r + specular.r * 255.0f)), + static_cast(std::min(255.0f, diffuse.g * appearance.color.g + specular.g * 255.0f)), + static_cast(std::min(255.0f, diffuse.b * appearance.color.b + specular.b * 255.0f)), + appearance.color.a + }; +} + +// Clip a vertex against a general plane, interpolating all attributes +static D3DRMVERTEX ClipEdgePlane(const D3DRMVERTEX& a, const D3DRMVERTEX& b, const Plane& plane) +{ + float da = DotProduct(plane.normal, a.position) + plane.d; + float db = DotProduct(plane.normal, b.position) + plane.d; + float t = da / (da - db); + + D3DRMVERTEX result; + result.position.x = a.position.x + t * (b.position.x - a.position.x); + result.position.y = a.position.y + t * (b.position.y - a.position.y); + result.position.z = a.position.z + t * (b.position.z - a.position.z); + result.normal.x = a.normal.x + t * (b.normal.x - a.normal.x); + result.normal.y = a.normal.y + t * (b.normal.y - a.normal.y); + result.normal.z = a.normal.z + t * (b.normal.z - a.normal.z); + result.texCoord.u = a.texCoord.u + t * (b.texCoord.u - a.texCoord.u); + result.texCoord.v = a.texCoord.v + t * (b.texCoord.v - a.texCoord.v); + return result; +} + +// Sutherland-Hodgman clip polygon against one plane +static int ClipPolygonAgainstPlane(const D3DRMVERTEX* in, int inCount, D3DRMVERTEX* out, const Plane& plane) +{ + if (inCount < 3) { + return 0; + } + int outCount = 0; + for (int i = 0; i < inCount; ++i) { + const D3DRMVERTEX& cur = in[i]; + const D3DRMVERTEX& next = in[(i + 1) % inCount]; + float dCur = DotProduct(plane.normal, cur.position) + plane.d; + float dNext = DotProduct(plane.normal, next.position) + plane.d; + if (dCur >= 0) { + out[outCount++] = cur; + if (dNext < 0) { + out[outCount++] = ClipEdgePlane(cur, next, plane); + } + } + else if (dNext >= 0) { + out[outCount++] = ClipEdgePlane(cur, next, plane); + } + } + return outCount; +} + +static bool IsTriangleOutsideViewCone( + const D3DVECTOR& v0, + const D3DVECTOR& v1, + const D3DVECTOR& v2, + const Plane* frustumPlanes +) +{ + for (int i = 0; i < 4; ++i) { + const Plane& plane = frustumPlanes[i]; + float d0 = DotProduct(plane.normal, v0) + plane.d; + float d1 = DotProduct(plane.normal, v1) + plane.d; + float d2 = DotProduct(plane.normal, v2) + plane.d; + if (d0 < 0 && d1 < 0 && d2 < 0) { + return true; + } + } + return false; +} + +static GlideMeshEntry UploadMeshGlide(const MeshGroup& meshGroup) +{ + GlideMeshEntry cache; + cache.meshGroup = &meshGroup; + cache.version = meshGroup.version; + cache.flat = meshGroup.quality == D3DRMRENDER_FLAT || meshGroup.quality == D3DRMRENDER_UNLITFLAT; + + if (cache.flat) { + FlattenSurfaces( + meshGroup.vertices.data(), + meshGroup.vertices.size(), + meshGroup.indices.data(), + meshGroup.indices.size(), + meshGroup.texture != nullptr, + cache.flatVertices, + cache.flatIndices + ); + } + + return cache; +} + +// --------------------------------------------------------------------------- +// Constructor / Destructor +// --------------------------------------------------------------------------- + +Direct3DRMGlideRenderer::Direct3DRMGlideRenderer(int width, int height) + : m_transparencyEnabled(false), m_nextTextureAddress(0) +{ + m_virtualWidth = width; + m_virtualHeight = height; + m_width = 640; + m_height = 480; + + memset(m_projection, 0, sizeof(m_projection)); + m_frontClip = 0.1f; + m_backClip = 1000.0f; + + grGlideInit(); + grSstSelect(0); + +#ifdef GLIDE3 + GrContext_t ctx = grSstWinOpen( + 0, + GR_RESOLUTION_640x480, + GR_REFRESH_60Hz, + GR_COLORFORMAT_ABGR, + GR_ORIGIN_UPPER_LEFT, + 2, // double buffer + 1 // aux buffer (z-buffer) + ); + if (!ctx) { + SDL_Log("Glide: grSstWinOpen failed"); + return; + } + m_glideContext = ctx; +#else + if (!grSstWinOpen( + 0, + GR_RESOLUTION_640x480, + GR_REFRESH_60Hz, + GR_COLORFORMAT_ABGR, + GR_ORIGIN_UPPER_LEFT, + 2, // double buffer + 1 // aux buffer (z-buffer) + )) { + SDL_Log("Glide: grSstWinOpen failed"); + return; + } +#endif + +#ifdef GLIDE3 + // Set up vertex layout for Glide 3 + grVertexLayout(GR_PARAM_XY, offsetof(GlideVertex, x), GR_PARAM_ENABLE); + grVertexLayout(GR_PARAM_Z, offsetof(GlideVertex, ooz), GR_PARAM_ENABLE); + grVertexLayout(GR_PARAM_Q, offsetof(GlideVertex, oow), GR_PARAM_ENABLE); + grVertexLayout(GR_PARAM_RGB, offsetof(GlideVertex, r), GR_PARAM_ENABLE); + grVertexLayout(GR_PARAM_A, offsetof(GlideVertex, a), GR_PARAM_ENABLE); + grVertexLayout(GR_PARAM_ST0, offsetof(GlideVertex, sow), GR_PARAM_ENABLE); +#endif + + // Enable W-buffer (uses oow field directly for depth) + grDepthBufferMode(GR_DEPTHBUFFER_WBUFFER); + grDepthBufferFunction(GR_CMP_LESS); + grDepthMask(FXTRUE); + + // Enable backface culling in software (matching software renderer) + // Don't use grCullMode as winding may differ after our projection + grCullMode(GR_CULL_DISABLE); + + // Default color combine: vertex color only (untextured) + grColorCombine( + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_NONE, + FXFALSE + ); + + // Default dithering + grDitherMode(GR_DITHER_4x4); + + // Default alpha blend (opaque) + grAlphaBlendFunction(GR_BLEND_ONE, GR_BLEND_ZERO, GR_BLEND_ZERO, GR_BLEND_ZERO); + + // Initialize texture memory allocator + m_nextTextureAddress = grTexMinAddress(GR_TMU0); + + ViewportTransform viewportTransform = {1.0f, 0.0f, 0.0f}; + Resize(width, height, viewportTransform); +} + +Direct3DRMGlideRenderer::~Direct3DRMGlideRenderer() +{ +#ifdef GLIDE3 + grSstWinClose(m_glideContext); +#else + grSstWinClose(); +#endif + grGlideShutdown(); +} + +HRESULT Direct3DRMGlideRenderer::BeginFrame() +{ + return S_OK; +} + +HRESULT Direct3DRMGlideRenderer::FinalizeFrame() +{ + // Reset alpha blend and depth mask if transparency was used this frame + if (m_transparencyEnabled) { + m_transparencyEnabled = false; + grAlphaBlendFunction(GR_BLEND_ONE, GR_BLEND_ZERO, GR_BLEND_ZERO, GR_BLEND_ZERO); + grDepthMask(FXTRUE); + } + return S_OK; +} + +void Direct3DRMGlideRenderer::Clear(float r, float g, float b) +{ + // GR_COLORFORMAT_ABGR: color is packed as 0xAABBGGRR + GrColor_t color = 0xFF000000 | ((Uint32) (b * 255.0f) << 16) | ((Uint32) (g * 255.0f) << 8) | (Uint32) (r * 255.0f); + grBufferClear(color, 0, 0xFFFF); +} + +void Direct3DRMGlideRenderer::Flip() +{ + grBufferSwap(0); +} + +void Direct3DRMGlideRenderer::Resize(int width, int height, const ViewportTransform& viewportTransform) +{ + // Voodoo is fixed at 640x480, so we just store the viewport transform + m_viewportTransform = viewportTransform; + m_width = 640; + m_height = 480; + + m_viewportTransform.scale = + std::min(static_cast(m_width) / m_virtualWidth, static_cast(m_height) / m_virtualHeight); + m_viewportTransform.offsetX = (m_width - (m_virtualWidth * m_viewportTransform.scale)) / 2.0f; + m_viewportTransform.offsetY = (m_height - (m_virtualHeight * m_viewportTransform.scale)) / 2.0f; +} + +void Direct3DRMGlideRenderer::PushLights(const SceneLight* lights, size_t count) +{ + m_lights.assign(lights, lights + count); +} + +void Direct3DRMGlideRenderer::SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) +{ + m_frontClip = front; + m_backClip = back; + memcpy(m_projection, projection, sizeof(D3DRMMATRIX4D)); +} + +void Direct3DRMGlideRenderer::SetFrustumPlanes(const Plane* frustumPlanes) +{ + memcpy(m_frustumPlanes, frustumPlanes, sizeof(m_frustumPlanes)); +} + +void Direct3DRMGlideRenderer::EnableTransparency() +{ + m_transparencyEnabled = true; + grAlphaBlendFunction(GR_BLEND_SRC_ALPHA, GR_BLEND_ONE_MINUS_SRC_ALPHA, GR_BLEND_ZERO, GR_BLEND_ZERO); + grDepthMask(FXFALSE); // don't write to depth buffer for transparent objects +} + +void Direct3DRMGlideRenderer::SetDither(bool dither) +{ + grDitherMode(dither ? GR_DITHER_4x4 : GR_DITHER_DISABLE); +} + +void Direct3DRMGlideRenderer::SetPalette(SDL_Palette* palette) +{ + m_palette = palette; + if (palette && palette->ncolors >= 256) { + // Upload palette to Glide texture palette table + GuTexPalette glidePal; + for (int i = 0; i < 256; ++i) { + // Texture palette is always ARGB regardless of GR_COLORFORMAT + glidePal.data[i] = + (static_cast(palette->colors[i].a) << 24) | (static_cast(palette->colors[i].r) << 16) | + (static_cast(palette->colors[i].g) << 8) | (static_cast(palette->colors[i].b)); + } +#ifdef GLIDE3 + grTexDownloadTable(GR_TEXTABLE_PALETTE, &glidePal); +#else + grTexDownloadTable(GR_TMU0, GR_TEXTABLE_PALETTE, &glidePal); +#endif + m_paletteUploaded = true; + } +} + +static GrLOD_t GlideLODFromSize(int size) +{ + switch (size) { + case 256: + return GR_LOD_LOG2_256; + case 128: + return GR_LOD_LOG2_128; + case 64: + return GR_LOD_LOG2_64; + case 32: + return GR_LOD_LOG2_32; + case 16: + return GR_LOD_LOG2_16; + case 8: + return GR_LOD_LOG2_8; + case 4: + return GR_LOD_LOG2_4; + case 2: + return GR_LOD_LOG2_2; + case 1: + return GR_LOD_LOG2_1; + default: + return GR_LOD_LOG2_256; + } +} + +static int NextPow2(int v) +{ + if (v <= 1) { + return 1; + } + int p = 1; + while (p < v) { + p <<= 1; + } + if (p > 256) { + p = 256; // Glide max + } + return p; +} + +static void UploadGlideTexture(GlideTextureEntry& entry, SDL_Surface* surface, FxU32& nextAddress) +{ + int srcW = surface->w; + int srcH = surface->h; + int texW = NextPow2(srcW); + int texH = NextPow2(srcH); + + // Glide requires square aspect ratio or specific aspect ratios. + // Use the larger dimension for both LOD values. + int maxDim = texW > texH ? texW : texH; + int minDim = texW < texH ? texW : texH; + + GrLOD_t largeLod = GlideLODFromSize(maxDim); + GrLOD_t smallLod = largeLod; // single mip level + + GrAspectRatio_t aspect; + int ratio = maxDim / minDim; + if (texW == texH) { + aspect = GR_ASPECT_LOG2_1x1; + } + else if (texW > texH) { + switch (ratio) { + case 2: + aspect = GR_ASPECT_LOG2_2x1; + break; + case 4: + aspect = GR_ASPECT_LOG2_4x1; + break; + case 8: + aspect = GR_ASPECT_LOG2_8x1; + break; + default: + aspect = GR_ASPECT_LOG2_8x1; + break; + } + } + else { + switch (ratio) { + case 2: + aspect = GR_ASPECT_LOG2_1x2; + break; + case 4: + aspect = GR_ASPECT_LOG2_1x4; + break; + case 8: + aspect = GR_ASPECT_LOG2_1x8; + break; + default: + aspect = GR_ASPECT_LOG2_1x8; + break; + } + } + + entry.info.smallLodLog2 = smallLod; + entry.info.largeLodLog2 = largeLod; + entry.info.aspectRatioLog2 = aspect; + entry.info.format = GR_TEXFMT_P_8; + + // Build 8-bit texture data, scaling if needed + std::vector texData(texW * texH); + + SDL_LockSurface(surface); + Uint8* srcPixels = static_cast(surface->pixels); + int srcPitch = surface->pitch; + + for (int y = 0; y < texH; ++y) { + int srcY = (y * srcH) / texH; + if (srcY >= srcH) { + srcY = srcH - 1; + } + for (int x = 0; x < texW; ++x) { + int srcX = (x * srcW) / texW; + if (srcX >= srcW) { + srcX = srcW - 1; + } + texData[y * texW + x] = srcPixels[srcY * srcPitch + srcX]; + } + } + SDL_UnlockSurface(surface); + + entry.info.data = texData.data(); + + // Calculate memory needed and allocate + FxU32 memNeeded = grTexCalcMemRequired(smallLod, largeLod, aspect, GR_TEXFMT_P_8); + + // Textures cannot span a 2MB boundary + const FxU32 BOUNDARY = 2 * 1024 * 1024; + FxU32 boundaryStart = (nextAddress / BOUNDARY) * BOUNDARY; + FxU32 boundaryEnd = boundaryStart + BOUNDARY; + if (nextAddress + memNeeded > boundaryEnd) { + // Skip to next 2MB boundary + nextAddress = boundaryEnd; + } + + // Check if we have space + FxU32 texMemEnd = grTexMaxAddress(GR_TMU0); + if (nextAddress + memNeeded > texMemEnd) { + // Out of texture memory + entry.startAddress = 0xFFFFFFFF; + entry.texW = 0; + entry.texH = 0; + return; + } + + entry.startAddress = nextAddress; + entry.texW = 256; // Glide 3 tex coords are always in [0, 256] range + entry.texH = 256; + + grTexDownloadMipMap(GR_TMU0, nextAddress, GR_MIPMAPLEVELMASK_BOTH, &entry.info); + + nextAddress += memNeeded; + entry.info.data = nullptr; // don't keep dangling pointer +} + +// Convert any surface to 8-bit indexed using the game palette +static SDL_Surface* ConvertToIndexed(SDL_Surface* surface, SDL_Palette* palette) +{ + int w = surface->w; + int h = surface->h; + int bpp = SDL_GetPixelFormatDetails(surface->format)->bytes_per_pixel; + + SDL_Surface* indexed = SDL_CreateSurface(w, h, SDL_PIXELFORMAT_INDEX8); + SDL_SetSurfacePalette(indexed, palette); + SDL_LockSurface(indexed); + + Uint8* dst = static_cast(indexed->pixels); + int dstPitch = indexed->pitch; + + if (bpp == 1) { + // 8-bit source: remap palette indices + SDL_LockSurface(surface); + SDL_Palette* srcPal = SDL_GetSurfacePalette(surface); + Uint8* src = static_cast(surface->pixels); + int srcPitch = surface->pitch; + + // Build remap table + Uint8 remap[256]; + if (srcPal && srcPal != palette) { + for (int i = 0; i < 256; ++i) { + if (i >= srcPal->ncolors) { + remap[i] = 0; + continue; + } + int sr = srcPal->colors[i].r; + int sg = srcPal->colors[i].g; + int sb = srcPal->colors[i].b; + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < palette->ncolors; ++c) { + int dr = palette->colors[c].r - sr; + int dg = palette->colors[c].g - sg; + int db = palette->colors[c].b - sb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + remap[i] = bestIdx; + } + } + else { + for (int i = 0; i < 256; ++i) { + remap[i] = static_cast(i); + } + } + + for (int y = 0; y < h; ++y) { + Uint8* srcRow = src + y * srcPitch; + Uint8* dstRow = dst + y * dstPitch; + for (int x = 0; x < w; ++x) { + dstRow[x] = remap[srcRow[x]]; + } + } + SDL_UnlockSurface(surface); + } + else { + // Non-paletted source: convert to RGBA32 first for consistent byte order + SDL_Surface* rgba = SDL_ConvertSurface(surface, SDL_PIXELFORMAT_RGBA32); + SDL_LockSurface(rgba); + + Uint8* src = static_cast(rgba->pixels); + int srcPitch = rgba->pitch; + + for (int y = 0; y < h; ++y) { + for (int x = 0; x < w; ++x) { + Uint8* px = src + y * srcPitch + x * 4; + int pr = px[0], pg = px[1], pb = px[2], pa = px[3]; + + if (pa == 0) { + dst[y * dstPitch + x] = 0; + } + else { + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 1; c < palette->ncolors; ++c) { + int dr = pr - palette->colors[c].r; + int dg = pg - palette->colors[c].g; + int db = pb - palette->colors[c].b; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + dst[y * dstPitch + x] = bestIdx; + } + } + } + + SDL_UnlockSurface(rgba); + SDL_DestroySurface(rgba); + } + + SDL_UnlockSurface(indexed); + return indexed; +} + +Uint32 Direct3DRMGlideRenderer::GetTextureId(IDirect3DRMTexture* iTexture, bool isUI, float scaleX, float scaleY) +{ + auto texture = static_cast(iTexture); + auto surface = static_cast(texture->m_surface); + + // Check if already cached + for (Uint32 i = 0; i < m_textureCache.size(); ++i) { + if (m_textureCache[i].texture == iTexture) { + // Re-upload if version changed or was deferred (no palette at first call) + if (m_textureCache[i].version != texture->m_version || + (m_textureCache[i].startAddress == 0xFFFFFFFF && m_palette)) { + if (m_palette && surface->m_surface) { + SDL_Surface* converted = ConvertToIndexed(surface->m_surface, m_palette); + FxU32 addr = m_textureCache[i].startAddress; + if (addr != 0xFFFFFFFF) { + FxU32 tempAddr = addr; + UploadGlideTexture(m_textureCache[i], converted, tempAddr); + m_textureCache[i].startAddress = addr; + } + else { + UploadGlideTexture(m_textureCache[i], converted, m_nextTextureAddress); + } + SDL_DestroySurface(converted); + } + m_textureCache[i].version = texture->m_version; + } + return i; + } + } + + // New texture + GlideTextureEntry entry; + memset(&entry, 0, sizeof(entry)); + entry.texture = iTexture; + entry.version = texture->m_version; + entry.startAddress = 0xFFFFFFFF; + entry.texW = 0; + entry.texH = 0; + + if (surface->m_surface && m_palette) { + SDL_Surface* converted = ConvertToIndexed(surface->m_surface, m_palette); + UploadGlideTexture(entry, converted, m_nextTextureAddress); + SDL_DestroySurface(converted); + } + + m_textureCache.push_back(entry); + return static_cast(m_textureCache.size() - 1); +} + +Uint32 Direct3DRMGlideRenderer::GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) +{ + for (Uint32 i = 0; i < m_meshCache.size(); ++i) { + auto& cache = m_meshCache[i]; + if (cache.meshGroup == meshGroup) { + if (cache.version != meshGroup->version) { + cache = std::move(UploadMeshGlide(*meshGroup)); + } + return i; + } + } + + auto newCache = UploadMeshGlide(*meshGroup); + + for (Uint32 i = 0; i < m_meshCache.size(); ++i) { + auto& cache = m_meshCache[i]; + if (!cache.meshGroup) { + cache = std::move(newCache); + return i; + } + } + + m_meshCache.push_back(std::move(newCache)); + return static_cast(m_meshCache.size() - 1); +} + +// Screen-space Sutherland-Hodgman clipping for GlideVertex polygons +static GlideVertex LerpGlideVertex(const GlideVertex& a, const GlideVertex& b, float t) +{ + GlideVertex r; + r.x = a.x + t * (b.x - a.x); + r.y = a.y + t * (b.y - a.y); + r.ooz = a.ooz + t * (b.ooz - a.ooz); + r.oow = a.oow + t * (b.oow - a.oow); + r.r = a.r + t * (b.r - a.r); + r.g = a.g + t * (b.g - a.g); + r.b = a.b + t * (b.b - a.b); + r.a = a.a + t * (b.a - a.a); +#ifdef GLIDE3 + r.sow = a.sow + t * (b.sow - a.sow); + r.tow = a.tow + t * (b.tow - a.tow); +#else + r.tmuvtx[0].sow = a.tmuvtx[0].sow + t * (b.tmuvtx[0].sow - a.tmuvtx[0].sow); + r.tmuvtx[0].tow = a.tmuvtx[0].tow + t * (b.tmuvtx[0].tow - a.tmuvtx[0].tow); +#endif + return r; +} + +// Clip edges: 0=left, 1=right, 2=top, 3=bottom +static float ScreenEdgeDist(const GlideVertex& v, int edge, float minX, float maxX, float minY, float maxY) +{ + switch (edge) { + case 0: + return v.x - minX; + case 1: + return maxX - v.x; + case 2: + return v.y - minY; + case 3: + return maxY - v.y; + default: + return 0.0f; + } +} + +static int ClipGlidePolygonAgainstEdge( + const GlideVertex* in, + int inCount, + GlideVertex* out, + int edge, + float minX, + float maxX, + float minY, + float maxY +) +{ + if (inCount < 3) { + return 0; + } + int outCount = 0; + for (int i = 0; i < inCount; ++i) { + const GlideVertex& cur = in[i]; + const GlideVertex& next = in[(i + 1) % inCount]; + float dCur = ScreenEdgeDist(cur, edge, minX, maxX, minY, maxY); + float dNext = ScreenEdgeDist(next, edge, minX, maxX, minY, maxY); + if (dCur >= 0) { + out[outCount++] = cur; + if (dNext < 0) { + float t = dCur / (dCur - dNext); + out[outCount++] = LerpGlideVertex(cur, next, t); + } + } + else if (dNext >= 0) { + float t = dCur / (dCur - dNext); + out[outCount++] = LerpGlideVertex(cur, next, t); + } + } + return outCount; +} + +static int ClipGlidePolygonToScreen( + GlideVertex* verts, + int count, + GlideVertex* temp, + float minX, + float maxX, + float minY, + float maxY +) +{ + for (int edge = 0; edge < 4; ++edge) { + count = ClipGlidePolygonAgainstEdge(verts, count, temp, edge, minX, maxX, minY, maxY); + if (count < 3) { + return 0; + } + memcpy(verts, temp, count * sizeof(GlideVertex)); + } + return count; +} + +static void FillGlideVertex( + GlideVertex& gv, + float screenX, + float screenY, + float oow, + float r, + float g, + float b, + float a, + float sow, + float tow +) +{ + gv.x = screenX; + gv.y = screenY; + gv.ooz = 0; // not used in W-buffer mode + gv.oow = oow; + gv.r = r; + gv.g = g; + gv.b = b; + gv.a = a; +#ifdef GLIDE3 + gv.sow = sow; + gv.tow = tow; +#else + gv.tmuvtx[0].sow = sow; + gv.tmuvtx[0].tow = tow; +#endif +} + +void Direct3DRMGlideRenderer::SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance +) +{ + if (meshId >= m_meshCache.size()) { + return; + } + auto& mesh = m_meshCache[meshId]; + + // We need the original D3DRMVERTEX data - reconstruct from mesh group + const MeshGroup* mg = mesh.meshGroup; + if (!mg) { + return; + } + + // Get flat/smooth vertices - use cached data to avoid per-frame allocations + const D3DRMVERTEX* cpuVerts; + const uint16_t* flatIdx = nullptr; + const DWORD* dwordIdx = nullptr; + size_t vertCount, idxCount; + + if (mesh.flat) { + cpuVerts = mesh.flatVertices.data(); + flatIdx = mesh.flatIndices.data(); + vertCount = mesh.flatVertices.size(); + idxCount = mesh.flatIndices.size(); + } + else { + cpuVerts = mg->vertices.data(); + dwordIdx = mg->indices.data(); + vertCount = mg->vertices.size(); + idxCount = mg->indices.size(); + } + + // Transform vertices to view space and pre-compute lighting + m_transformedVertices.clear(); + m_transformedVertices.reserve(vertCount); + m_litColors.clear(); + m_litColors.reserve(vertCount); + for (size_t vi = 0; vi < vertCount; ++vi) { + D3DRMVERTEX dst; + dst.position = TransformPoint(cpuVerts[vi].position, modelViewMatrix); + dst.normal = cpuVerts[vi].normal; + dst.texCoord = cpuVerts[vi].texCoord; + m_transformedVertices.push_back(dst); + m_litColors.push_back(ApplyLighting(m_lights, dst.position, dst.normal, normalMatrix, appearance)); + } + + // Set up Glide texture combine mode + bool hasTexture = (appearance.textureId != NO_TEXTURE_ID); + float texW = 256.0f; + float texH = 256.0f; + + if (hasTexture) { + if (appearance.textureId >= m_textureCache.size()) { + hasTexture = false; + } + } + if (hasTexture) { + auto& texEntry = m_textureCache[appearance.textureId]; + if (texEntry.startAddress == 0xFFFFFFFF) { + hasTexture = false; + } + else { + texW = static_cast(texEntry.texW); + texH = static_cast(texEntry.texH); + grTexSource(GR_TMU0, texEntry.startAddress, GR_MIPMAPLEVELMASK_BOTH, &texEntry.info); + + // Textured + lit: modulate texture by vertex color + grColorCombine( + GR_COMBINE_FUNCTION_SCALE_OTHER, + GR_COMBINE_FACTOR_LOCAL, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_TEXTURE, + FXFALSE + ); + grTexCombine( + GR_TMU0, + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + FXFALSE, + FXFALSE + ); + // Alpha from texture + grAlphaCombine( + GR_COMBINE_FUNCTION_SCALE_OTHER, + GR_COMBINE_FACTOR_LOCAL, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_TEXTURE, + FXFALSE + ); + } + } + if (!hasTexture) { + // Untextured: vertex color only + grColorCombine( + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_NONE, + FXFALSE + ); + // Alpha from vertex + grAlphaCombine( + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_NONE, + FXFALSE + ); + } + + // Index accessor to handle different index types + auto getIndex = [flatIdx, dwordIdx](size_t i) -> uint32_t { + return flatIdx ? static_cast(flatIdx[i]) : static_cast(dwordIdx[i]); + }; + + // Process triangles + for (size_t i = 0; i + 2 < idxCount; i += 3) { + uint32_t idx0 = getIndex(i), idx1 = getIndex(i + 1), idx2 = getIndex(i + 2); + D3DRMVERTEX v[3] = { + m_transformedVertices[idx0], + m_transformedVertices[idx1], + m_transformedVertices[idx2], + }; + + // Backface culling in view space (same as software renderer) + { + D3DVECTOR e1 = { + v[1].position.x - v[0].position.x, + v[1].position.y - v[0].position.y, + v[1].position.z - v[0].position.z + }; + D3DVECTOR e2 = { + v[2].position.x - v[0].position.x, + v[2].position.y - v[0].position.y, + v[2].position.z - v[0].position.z + }; + D3DVECTOR normal = CrossProduct(e1, e2); + if (DotProduct(normal, v[0].position) >= 0.0f) { + continue; + } + } + + // Near-plane clip check (quick reject) + if (v[0].position.z < m_frontClip && v[1].position.z < m_frontClip && v[2].position.z < m_frontClip) { + continue; + } + if (v[0].position.z > m_backClip && v[1].position.z > m_backClip && v[2].position.z > m_backClip) { + continue; + } + + // Frustum side-plane quick reject + if (IsTriangleOutsideViewCone(v[0].position, v[1].position, v[2].position, m_frustumPlanes)) { + continue; + } + + // Check if near-plane clipping is needed + bool needsClip = + (v[0].position.z < m_frontClip || v[1].position.z < m_frontClip || v[2].position.z < m_frontClip); + + if (!needsClip) { + // Fast path: no clipping needed, use pre-lit colors + GlideVertex grVerts[3]; + bool validTri = true; + for (int j = 0; j < 3; ++j) { + float sx, sy, sz, sw; + ProjectVertex(m_projection, m_width, m_height, v[j].position, sx, sy, sz, sw); + if (sw <= 0.001f) { + validTri = false; + break; + } + + float oow = 1.0f / sw; + SDL_Color litColor = m_litColors[getIndex(i + j)]; + + float sow = 0.0f, tow = 0.0f; + if (hasTexture) { + sow = v[j].texCoord.u * texW * oow; + tow = v[j].texCoord.v * texH * oow; + } + + FillGlideVertex( + grVerts[j], + sx, + sy, + oow, + static_cast(litColor.r), + static_cast(litColor.g), + static_cast(litColor.b), + static_cast(litColor.a), + sow, + tow + ); + } + if (!validTri) { + continue; + } + + // Clip to screen bounds + GlideVertex clipTemp[12]; + int polyCount = + ClipGlidePolygonToScreen(grVerts, 3, clipTemp, 0.0f, (float) m_width, 0.0f, (float) m_height); + if (polyCount < 3) { + continue; + } + + for (int j = 1; j < polyCount - 1; ++j) { + grDrawTriangle(&grVerts[0], &grVerts[j], &grVerts[j + 1]); + } + } + else { + // Slow path: near-plane clipping generates new vertices, recompute lighting + D3DRMVERTEX clipA[12], clipB[12]; + clipA[0] = v[0]; + clipA[1] = v[1]; + clipA[2] = v[2]; + int polyCount = 3; + + Plane nearPlane = {{0, 0, 1}, -m_frontClip}; + polyCount = ClipPolygonAgainstPlane(clipA, polyCount, clipB, nearPlane); + if (polyCount < 3) { + continue; + } + memcpy(clipA, clipB, polyCount * sizeof(D3DRMVERTEX)); + + GlideVertex grVerts[12]; + bool validTri = true; + for (int j = 0; j < polyCount; ++j) { + float sx, sy, sz, sw; + ProjectVertex(m_projection, m_width, m_height, clipA[j].position, sx, sy, sz, sw); + if (sw <= 0.001f) { + validTri = false; + break; + } + + float oow = 1.0f / sw; + SDL_Color litColor = + ApplyLighting(m_lights, clipA[j].position, clipA[j].normal, normalMatrix, appearance); + + float sow = 0.0f, tow = 0.0f; + if (hasTexture) { + sow = clipA[j].texCoord.u * texW * oow; + tow = clipA[j].texCoord.v * texH * oow; + } + + FillGlideVertex( + grVerts[j], + sx, + sy, + oow, + static_cast(litColor.r), + static_cast(litColor.g), + static_cast(litColor.b), + static_cast(litColor.a), + sow, + tow + ); + } + if (!validTri) { + continue; + } + + GlideVertex clipTemp[12]; + polyCount = + ClipGlidePolygonToScreen(grVerts, polyCount, clipTemp, 0.0f, (float) m_width, 0.0f, (float) m_height); + if (polyCount < 3) { + continue; + } + + for (int j = 1; j < polyCount - 1; ++j) { + grDrawTriangle(&grVerts[0], &grVerts[j], &grVerts[j + 1]); + } + } + } +} + +void Direct3DRMGlideRenderer::Draw2DImage( + Uint32 textureId, + const SDL_Rect& srcRect, + const SDL_Rect& dstRect, + FColor color +) +{ + float x0 = dstRect.x * m_viewportTransform.scale + m_viewportTransform.offsetX; + float y0 = dstRect.y * m_viewportTransform.scale + m_viewportTransform.offsetY; + float x1 = x0 + dstRect.w * m_viewportTransform.scale; + float y1 = y0 + dstRect.h * m_viewportTransform.scale; + + float r = color.r * 255.0f; + float g = color.g * 255.0f; + float b = color.b * 255.0f; + float a = color.a * 255.0f; + + if (textureId == NO_TEXTURE_ID) { + // Solid color quad + grColorCombine( + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_LOCAL_ITERATED, + GR_COMBINE_OTHER_NONE, + FXFALSE + ); + + grDepthBufferMode(GR_DEPTHBUFFER_DISABLE); + + GlideVertex v0, v1, v2, v3; + memset(&v0, 0, sizeof(GlideVertex)); + memset(&v1, 0, sizeof(GlideVertex)); + memset(&v2, 0, sizeof(GlideVertex)); + memset(&v3, 0, sizeof(GlideVertex)); + + FillGlideVertex(v0, x0, y0, 1.0f, r, g, b, a, 0, 0); + FillGlideVertex(v1, x1, y0, 1.0f, r, g, b, a, 0, 0); + FillGlideVertex(v2, x1, y1, 1.0f, r, g, b, a, 0, 0); + FillGlideVertex(v3, x0, y1, 1.0f, r, g, b, a, 0, 0); + + grDrawTriangle(&v0, &v1, &v2); + grDrawTriangle(&v0, &v2, &v3); + + grDepthBufferMode(GR_DEPTHBUFFER_WBUFFER); + return; + } + + if (textureId >= m_textureCache.size()) { + return; + } + // Get the underlying surface for this texture to determine real dimensions + auto& texEntry = m_textureCache[textureId]; + auto texture = static_cast(texEntry.texture); + auto surface = static_cast(texture->m_surface); + SDL_Surface* src = surface->m_surface; + + if (!src) { + return; + } + + int imgW = src->w; + int imgH = src->h; + + // For color-keyed 2D images, render as a P_8 textured quad with hardware chromakey. + // This avoids RGB565 quantization issues that cause false color key matches. + Uint32 ck = 0; + bool hasCK = SDL_GetSurfaceColorKey(src, &ck); + + if (hasCK && src->format == SDL_PIXELFORMAT_INDEX8 && imgW <= 64 && imgH <= 64) { + // Use power-of-2 square texture + int texW = 1; + while (texW < imgW) { + texW <<= 1; + } + if (texW > 256) { + texW = 256; + } + int texH = 1; + while (texH < imgH) { + texH <<= 1; + } + if (texH > 256) { + texH = 256; + } + // Make square (use larger dimension) + int texSize = std::max(texW, texH); + + // Allocate raw 8-bit texture data filled with colorkey index + std::vector texData(texSize * texSize, (Uint8) ck); + + // Copy source pixels + SDL_LockSurface(src); + for (int row = 0; row < imgH; ++row) { + Uint8* srcRow = static_cast(src->pixels) + row * src->pitch; + for (int col = 0; col < imgW; ++col) { + texData[row * texSize + col] = srcRow[col]; + } + } + SDL_UnlockSurface(src); + + // Determine LOD + GrLOD_t lod = GR_LOD_LOG2_256; + if (texSize <= 128) { + lod = GR_LOD_LOG2_128; + } + if (texSize <= 64) { + lod = GR_LOD_LOG2_64; + } + if (texSize <= 32) { + lod = GR_LOD_LOG2_32; + } + if (texSize <= 16) { + lod = GR_LOD_LOG2_16; + } + if (texSize <= 8) { + lod = GR_LOD_LOG2_8; + } + if (texSize <= 4) { + lod = GR_LOD_LOG2_4; + } + if (texSize <= 2) { + lod = GR_LOD_LOG2_2; + } + if (texSize <= 1) { + lod = GR_LOD_LOG2_1; + } + + GrTexInfo info; + info.smallLodLog2 = lod; + info.largeLodLog2 = lod; + info.aspectRatioLog2 = GR_ASPECT_LOG2_1x1; + info.format = GR_TEXFMT_P_8; + info.data = texData.data(); + + // Upload to start of texture memory as a transient texture. + // This is safe because SubmitDraw always re-binds via grTexSource before 3D rendering. + FxU32 texAddr = grTexMinAddress(GR_TMU0); + grTexDownloadMipMap(GR_TMU0, texAddr, GR_MIPMAPLEVELMASK_BOTH, &info); + grTexSource(GR_TMU0, texAddr, GR_MIPMAPLEVELMASK_BOTH, &info); + + // Set rendering state + grDepthBufferMode(GR_DEPTHBUFFER_DISABLE); + grColorCombine( + GR_COMBINE_FUNCTION_SCALE_OTHER, + GR_COMBINE_FACTOR_ONE, + GR_COMBINE_LOCAL_NONE, + GR_COMBINE_OTHER_TEXTURE, + FXFALSE + ); + grTexCombine( + GR_TMU0, + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + GR_COMBINE_FUNCTION_LOCAL, + GR_COMBINE_FACTOR_NONE, + FXFALSE, + FXFALSE + ); + + // Enable chroma key + grChromakeyMode(GR_CHROMAKEY_ENABLE); + SDL_Palette* pal = m_palette ? m_palette : SDL_GetSurfacePalette(src); + Uint8 ckIdx = (Uint8) ck; + GrColor_t ckColor = 0; + if (pal && ckIdx < pal->ncolors) { + ckColor = ((FxU32) pal->colors[ckIdx].r << 16) | ((FxU32) pal->colors[ckIdx].g << 8) | + ((FxU32) pal->colors[ckIdx].b); + } + grChromakeyValue(ckColor); + + // Draw quad + float qx0 = dstRect.x * m_viewportTransform.scale + m_viewportTransform.offsetX; + float qy0 = dstRect.y * m_viewportTransform.scale + m_viewportTransform.offsetY; + float qx1 = qx0 + dstRect.w * m_viewportTransform.scale; + float qy1 = qy0 + dstRect.h * m_viewportTransform.scale; + float halfTexel = 256.0f * 0.5f / static_cast(texSize); + float s0 = 256.0f * static_cast(srcRect.x) / static_cast(texSize); + float t0 = 256.0f * static_cast(srcRect.y) / static_cast(texSize) + halfTexel; + float s1 = 256.0f * static_cast(srcRect.x + srcRect.w) / static_cast(texSize); + float t1 = 256.0f * static_cast(srcRect.y + srcRect.h) / static_cast(texSize) + halfTexel; + + GlideVertex gv0, gv1, gv2, gv3; + memset(&gv0, 0, sizeof(GlideVertex)); + memset(&gv1, 0, sizeof(GlideVertex)); + memset(&gv2, 0, sizeof(GlideVertex)); + memset(&gv3, 0, sizeof(GlideVertex)); + + FillGlideVertex(gv0, qx0, qy0, 1.0f, 255, 255, 255, 255, s0, t0); + FillGlideVertex(gv1, qx1, qy0, 1.0f, 255, 255, 255, 255, s1, t0); + FillGlideVertex(gv2, qx1, qy1, 1.0f, 255, 255, 255, 255, s1, t1); + FillGlideVertex(gv3, qx0, qy1, 1.0f, 255, 255, 255, 255, s0, t1); + + grDrawTriangle(&gv0, &gv1, &gv2); + grDrawTriangle(&gv0, &gv2, &gv3); + + // Restore state + grChromakeyMode(GR_CHROMAKEY_DISABLE); + grDepthBufferMode(GR_DEPTHBUFFER_WBUFFER); + return; + } + + // LFB (linear framebuffer) direct write for 2D images. + { + SDL_SetSurfaceColorKey(src, false, 0); + SDL_Surface* converted = SDL_ConvertSurface(src, SDL_PIXELFORMAT_RGB565); + if (hasCK) { + SDL_SetSurfaceColorKey(src, true, ck); + } + if (!converted) { + return; + } + + // For color-keyed surfaces, we need to identify which pixels are transparent. + // The palette may have duplicate black entries, so we mark the keyed pixels + // with a unique sentinel value (0xF81F = magenta in RGB565) before writing. + Uint16 sentinel = 0xF81F; // bright magenta - unlikely to appear naturally + if (hasCK && src->format == SDL_PIXELFORMAT_INDEX8) { + // Re-scan original paletted source to mark keyed pixels in converted surface + SDL_LockSurface(src); + SDL_LockSurface(converted); + for (int row = 0; row < imgH; ++row) { + Uint8* srcRow = static_cast(src->pixels) + row * src->pitch; + Uint16* dstRow = + reinterpret_cast(static_cast(converted->pixels) + row * converted->pitch); + for (int col = 0; col < imgW; ++col) { + if (srcRow[col] == (Uint8) ck) { + dstRow[col] = sentinel; + } + } + } + SDL_UnlockSurface(src); + SDL_UnlockSurface(converted); + } + + // Calculate destination region on the 640x480 framebuffer + int dstX = static_cast(x0); + int dstY = static_cast(y0); + int dstW = static_cast(x1 - x0); + int dstH = static_cast(y1 - y0); + + // Prepare source at the correct size + SDL_Surface* blitSrc = converted; + SDL_Surface* scaled = nullptr; + if (dstW != srcRect.w || dstH != srcRect.h) { + scaled = SDL_CreateSurface(dstW, dstH, SDL_PIXELFORMAT_RGB565); + if (scaled) { + SDL_Rect sr = srcRect; + SDL_Rect dr = {0, 0, dstW, dstH}; + SDL_BlitSurfaceScaled(converted, &sr, scaled, &dr, SDL_SCALEMODE_NEAREST); + blitSrc = scaled; + } + } + + Uint16* srcPixels; + int srcPitch; + if (blitSrc == converted) { + srcPixels = reinterpret_cast( + static_cast(converted->pixels) + srcRect.y * converted->pitch + srcRect.x * 2 + ); + srcPitch = converted->pitch; + } + else { + srcPixels = reinterpret_cast(blitSrc->pixels); + srcPitch = blitSrc->pitch; + } + + int writeW = blitSrc == converted ? srcRect.w : dstW; + int writeH = blitSrc == converted ? srcRect.h : dstH; + + if (!hasCK) { + grLfbWriteRegion( + GR_BUFFER_BACKBUFFER, + dstX, + dstY, + GR_LFB_SRC_FMT_565, + writeW, + writeH, +#ifdef GLIDE3 + FXFALSE, +#endif + srcPitch, + srcPixels + ); + } + else { + // Read framebuffer, composite skipping sentinel, write back + int clipX = std::max(0, dstX); + int clipY = std::max(0, dstY); + int clipW = std::min(writeW, m_width - clipX); + int clipH = std::min(writeH, m_height - clipY); + int srcOffX = clipX - dstX; + int srcOffY = clipY - dstY; + + if (clipW > 0 && clipH > 0) { + std::vector fbRegion(clipW * clipH); + grLfbReadRegion(GR_BUFFER_BACKBUFFER, clipX, clipY, clipW, clipH, clipW * 2, fbRegion.data()); + + int srcStride = srcPitch / 2; + for (int row = 0; row < clipH; ++row) { + for (int col = 0; col < clipW; ++col) { + Uint16 px = srcPixels[(srcOffY + row) * srcStride + (srcOffX + col)]; + if (px != sentinel) { + fbRegion[row * clipW + col] = px; + } + } + } + + grLfbWriteRegion( + GR_BUFFER_BACKBUFFER, + clipX, + clipY, + GR_LFB_SRC_FMT_565, + clipW, + clipH, +#ifdef GLIDE3 + FXFALSE, +#endif + clipW * 2, + fbRegion.data() + ); + } + } + + if (scaled) { + SDL_DestroySurface(scaled); + } + SDL_DestroySurface(converted); + } +} + +// --------------------------------------------------------------------------- +// Framebuffer readback +// --------------------------------------------------------------------------- + +void Direct3DRMGlideRenderer::Download(SDL_Surface* target) +{ + if (!target) { + return; + } + + int srcX = static_cast(m_viewportTransform.offsetX); + int srcY = static_cast(m_viewportTransform.offsetY); + int srcW = static_cast(m_virtualWidth * m_viewportTransform.scale); + int srcH = static_cast(m_virtualHeight * m_viewportTransform.scale); + + // Allocate temporary buffer for Glide LFB read (16-bit RGB565) + std::vector lfbBuffer(m_width * m_height); + + grLfbReadRegion(GR_BUFFER_BACKBUFFER, 0, 0, m_width, m_height, m_width * 2, lfbBuffer.data()); + + // Create a temporary SDL surface from the LFB data + SDL_Surface* glideSurface = + SDL_CreateSurfaceFrom(m_width, m_height, SDL_PIXELFORMAT_RGB565, lfbBuffer.data(), m_width * 2); + + if (glideSurface) { + SDL_Rect srcRect = {srcX, srcY, srcW, srcH}; + SDL_BlitSurfaceScaled(glideSurface, &srcRect, target, nullptr, SDL_SCALEMODE_LINEAR); + SDL_DestroySurface(glideSurface); + } +} diff --git a/miniwin/src/d3drm/backends/palettesw/renderer.cpp b/miniwin/src/d3drm/backends/palettesw/renderer.cpp new file mode 100644 index 00000000..a4575267 --- /dev/null +++ b/miniwin/src/d3drm/backends/palettesw/renderer.cpp @@ -0,0 +1,1479 @@ +#include "d3drmrenderer.h" +#include "d3drmrenderer_palettesw.h" +#include "ddsurface_impl.h" +#include "mathutils.h" +#include "meshutils.h" +#include "miniwin.h" + +#include +#include +#include +#include +#include + +struct PalVertexXY { + float x, y, z, w; + Uint8 brightness; // 0..LIGHT_LEVELS-1 + float u_over_w, v_over_w; + float one_over_w; +}; + +static constexpr int PERSP_STEP = 16; + +inline static D3DVECTOR PalSubtract(const D3DVECTOR& a, const D3DVECTOR& b) +{ + return {a.x - b.x, a.y - b.y, a.z - b.z}; +} + +inline static bool PalIsBackface(const D3DVECTOR& a, const D3DVECTOR& b, const D3DVECTOR& c) +{ + D3DVECTOR normal = CrossProduct(PalSubtract(b, a), PalSubtract(c, a)); + return DotProduct(normal, a) >= 0.0f; +} + +Direct3DRMPaletteSWRenderer::Direct3DRMPaletteSWRenderer(DWORD width, DWORD height) +{ + m_virtualWidth = width; + m_virtualHeight = height; + + memset(m_lightLUT, 0, sizeof(m_lightLUT)); + memset(m_blendLUT, 0, sizeof(m_blendLUT)); + ViewportTransform viewportTransform = {1.0f, 0.0f, 0.0f}; + Resize(width, height, viewportTransform); +} + +Direct3DRMPaletteSWRenderer::~Direct3DRMPaletteSWRenderer() +{ + SDL_DestroySurface(m_renderedImage); + if (m_flipPalette) { + SDL_DestroyPalette(m_flipPalette); + } +} + +static bool PalettesEqual(SDL_Palette* a, SDL_Palette* b) +{ + if (!a || !b || a->ncolors != b->ncolors) { + return false; + } + return memcmp(a->colors, b->colors, a->ncolors * sizeof(SDL_Color)) == 0; +} + +// --------------------------------------------------------------------------- +// Lighting LUT +// --------------------------------------------------------------------------- +// For each palette entry and brightness level, precompute the closest palette +// index. Brightness 0 = black, LIGHT_LEVELS-1 = full colour. +// This avoids per-pixel RGB maths entirely — the rasteriser just does: +// outPixel = m_lightLUT[texel * LIGHT_LEVELS + brightness] +// --------------------------------------------------------------------------- + +void Direct3DRMPaletteSWRenderer::BuildLightingLUT() +{ + // Use m_flipPalette (snapshot from Flip time) if available — that's the + // palette actually sent to the VGA DAC. Fall back to m_palette for the + // first frame before any Flip has occurred. + SDL_Palette* pal = m_flipPalette ? m_flipPalette : m_palette; + if (!pal) { + return; + } + + const SDL_Color* colors = pal->colors; + const int ncolors = pal->ncolors; + + for (int idx = 0; idx < 256; ++idx) { + int sr, sg, sb; + if (idx < ncolors) { + sr = colors[idx].r; + sg = colors[idx].g; + sb = colors[idx].b; + } + else { + sr = sg = sb = 0; + } + + for (int lev = 0; lev < LIGHT_LEVELS; ++lev) { + // Target colour at this brightness + int tr = (sr * lev) / (LIGHT_LEVELS - 1); + int tg = (sg * lev) / (LIGHT_LEVELS - 1); + int tb = (sb * lev) / (LIGHT_LEVELS - 1); + + // Find nearest palette entry (redmean perceptual distance) + int bestDist = INT_MAX; + Uint8 bestIdx = static_cast(idx); + for (int c = 0; c < ncolors; ++c) { + int dr = colors[c].r - tr; + int dg = colors[c].g - tg; + int db = colors[c].b - tb; + int rmean = (tr + colors[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + m_lightLUT[idx * LIGHT_LEVELS + lev] = bestIdx; + } + } + + m_lightLUTDirty = false; +} + +void Direct3DRMPaletteSWRenderer::BuildBlendLUT() +{ + SDL_Palette* pal = m_flipPalette ? m_flipPalette : m_palette; + if (!pal) { + memset(m_blendLUT, 0, sizeof(m_blendLUT)); + return; + } + + const SDL_Color* colors = pal->colors; + const int ncolors = pal->ncolors; + + for (int a = 0; a < 256; ++a) { + int ar, ag, ab; + if (a < ncolors) { + ar = colors[a].r; + ag = colors[a].g; + ab = colors[a].b; + } + else { + ar = ag = ab = 0; + } + + for (int b = 0; b < 256; ++b) { + int br, bg, bb; + if (b < ncolors) { + br = colors[b].r; + bg = colors[b].g; + bb = colors[b].b; + } + else { + br = bg = bb = 0; + } + + // 50/50 blend + int tr = (ar + br) >> 1; + int tg = (ag + bg) >> 1; + int tb = (ab + bb) >> 1; + + // Find nearest palette entry + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < ncolors; ++c) { + int dr = colors[c].r - tr; + int dg = colors[c].g - tg; + int db = colors[c].b - tb; + int rmean = (tr + colors[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + m_blendLUT[a * 256 + b] = bestIdx; + } + } +} + +void Direct3DRMPaletteSWRenderer::PushLights(const SceneLight* lights, size_t count) +{ + m_lights.assign(lights, lights + count); +} + +void Direct3DRMPaletteSWRenderer::SetFrustumPlanes(const Plane* frustumPlanes) +{ + memcpy(m_frustumPlanes, frustumPlanes, sizeof(m_frustumPlanes)); +} + +void Direct3DRMPaletteSWRenderer::SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) +{ + m_front = front; + m_back = back; + memcpy(m_projection, projection, sizeof(D3DRMMATRIX4D)); +} + +void Direct3DRMPaletteSWRenderer::ClearZBuffer() +{ + static_assert(sizeof(float) == sizeof(uint32_t), "float must be 32-bit"); + const size_t size = m_zBuffer.size(); + uint32_t* dst = reinterpret_cast(m_zBuffer.data()); + for (size_t i = 0; i < size; ++i) { + dst[i] = 0x7F800000u; + } +} + +void Direct3DRMPaletteSWRenderer::ProjectVertex(const D3DVECTOR& v, D3DRMVECTOR4D& p) const +{ + float px = m_projection[0][0] * v.x + m_projection[1][0] * v.y + m_projection[2][0] * v.z + m_projection[3][0]; + float py = m_projection[0][1] * v.x + m_projection[1][1] * v.y + m_projection[2][1] * v.z + m_projection[3][1]; + float pz = m_projection[0][2] * v.x + m_projection[1][2] * v.y + m_projection[2][2] * v.z + m_projection[3][2]; + float pw = m_projection[0][3] * v.x + m_projection[1][3] * v.y + m_projection[2][3] * v.z + m_projection[3][3]; + + p.w = pw; + + // Perspective divide + if (pw != 0.0f) { + float invW = 1.0f / pw; + px *= invW; + py *= invW; + pz *= invW; + } + + // Map from NDC [-1,1] to screen coordinates + p.x = (px * 0.5f + 0.5f) * m_width; + p.y = (1.0f - (py * 0.5f + 0.5f)) * m_height; + p.z = pz; +} + +// --------------------------------------------------------------------------- +// Lighting — returns a brightness level 0..LIGHT_LEVELS-1 +// --------------------------------------------------------------------------- + +// Fast integer-based pow approximation for specular highlights. +// Repeated squaring: computes base^exp where exp is a positive integer. +// Good enough for 8-bit paletted lighting, avoids expensive FPU std::pow. +inline static float FastPow(float base, float exponent) +{ + if (base <= 0.0f) { + return 0.0f; + } + int iexp = static_cast(exponent + 0.5f); + if (iexp <= 0) { + return 1.0f; + } + float result = 1.0f; + float b = base; + while (iexp > 0) { + if (iexp & 1) { + result *= b; + } + b *= b; + iexp >>= 1; + } + return result; +} + +Uint8 Direct3DRMPaletteSWRenderer::ApplyLighting( + const D3DVECTOR& position, + const D3DVECTOR& normal, + const Appearance& appearance, + Uint8 texel +) +{ + (void) texel; // brightness is independent of the palette index + + float intensity = 0.0f; + + D3DVECTOR n = Normalize(TransformNormal(normal, m_normalMatrix)); + + for (const auto& light : m_lights) { + if (light.positional == 0.0f && light.directional == 0.0f) { + // Ambient + float lum = light.color.r * 0.299f + light.color.g * 0.587f + light.color.b * 0.114f; + intensity += lum; + continue; + } + + // Precompute luminance once per light (avoids redundant multiplies) + float lum = light.color.r * 0.299f + light.color.g * 0.587f + light.color.b * 0.114f; + + D3DVECTOR lightVec; + if (light.directional == 1.0f) { + lightVec = {-light.direction.x, -light.direction.y, -light.direction.z}; + } + else { + lightVec = {light.position.x - position.x, light.position.y - position.y, light.position.z - position.z}; + } + lightVec = Normalize(lightVec); + + float dotNL = DotProduct(n, lightVec); + if (dotNL > 0.0f) { + intensity += dotNL * lum; + + // Specular — use fast integer pow instead of std::pow + if (appearance.shininess > 0.0f && light.directional == 1.0f) { + D3DVECTOR viewVec = Normalize({-position.x, -position.y, -position.z}); + D3DVECTOR H = Normalize({lightVec.x + viewVec.x, lightVec.y + viewVec.y, lightVec.z + viewVec.z}); + float dotNH = std::max(DotProduct(n, H), 0.0f); + float spec = FastPow(dotNH, appearance.shininess); + intensity += spec * lum; + } + } + } + + intensity = std::min(intensity, 1.0f); + int level = static_cast(intensity * (LIGHT_LEVELS - 1) + 0.5f); + if (level < 0) { + level = 0; + } + if (level >= LIGHT_LEVELS) { + level = LIGHT_LEVELS - 1; + } + return static_cast(level); +} + +static D3DRMVERTEX PalSplitEdge(D3DRMVERTEX a, const D3DRMVERTEX& b, float plane) +{ + float t = (plane - a.position.z) / (b.position.z - a.position.z); + a.position.x += t * (b.position.x - a.position.x); + a.position.y += t * (b.position.y - a.position.y); + a.position.z = plane; + + a.texCoord.u += t * (b.texCoord.u - a.texCoord.u); + a.texCoord.v += t * (b.texCoord.v - a.texCoord.v); + + a.normal.x += t * (b.normal.x - a.normal.x); + a.normal.y += t * (b.normal.y - a.normal.y); + a.normal.z += t * (b.normal.z - a.normal.z); + + a.normal = Normalize(a.normal); + + return a; +} + +static bool PalIsTriangleOutsideViewCone( + const D3DVECTOR& v0, + const D3DVECTOR& v1, + const D3DVECTOR& v2, + const Plane* frustumPlanes +) +{ + for (int i = 0; i < 4; ++i) { + const Plane& plane = frustumPlanes[i]; + + float d0 = DotProduct(plane.normal, v0) + plane.d; + float d1 = DotProduct(plane.normal, v1) + plane.d; + float d2 = DotProduct(plane.normal, v2) + plane.d; + + if (d0 < 0 && d1 < 0 && d2 < 0) { + return true; + } + } + return false; +} + +void Direct3DRMPaletteSWRenderer::DrawTriangleClipped(const D3DRMVERTEX (&v)[3], const Appearance& appearance) +{ + bool in0 = v[0].position.z >= m_front; + bool in1 = v[1].position.z >= m_front; + bool in2 = v[2].position.z >= m_front; + + int insideCount = in0 + in1 + in2; + + if (insideCount == 0 || (v[0].position.z > m_back && v[1].position.z > m_back && v[2].position.z > m_back)) { + return; + } + if (PalIsTriangleOutsideViewCone(v[0].position, v[1].position, v[2].position, m_frustumPlanes)) { + return; + } + + if (insideCount == 3) { + DrawTriangleProjected(v[0], v[1], v[2], appearance); + } + else if (insideCount == 2) { + D3DRMVERTEX split; + if (!in0) { + split = PalSplitEdge(v[2], v[0], m_front); + DrawTriangleProjected(v[1], v[2], split, appearance); + DrawTriangleProjected(v[1], split, PalSplitEdge(v[1], v[0], m_front), appearance); + } + else if (!in1) { + split = PalSplitEdge(v[0], v[1], m_front); + DrawTriangleProjected(v[2], v[0], split, appearance); + DrawTriangleProjected(v[2], split, PalSplitEdge(v[2], v[1], m_front), appearance); + } + else { + split = PalSplitEdge(v[1], v[2], m_front); + DrawTriangleProjected(v[0], v[1], split, appearance); + DrawTriangleProjected(v[0], split, PalSplitEdge(v[0], v[2], m_front), appearance); + } + } + else if (in0) { + DrawTriangleProjected(v[0], PalSplitEdge(v[0], v[1], m_front), PalSplitEdge(v[0], v[2], m_front), appearance); + } + else if (in1) { + DrawTriangleProjected(PalSplitEdge(v[1], v[0], m_front), v[1], PalSplitEdge(v[1], v[2], m_front), appearance); + } + else { + DrawTriangleProjected(PalSplitEdge(v[2], v[0], m_front), PalSplitEdge(v[2], v[1], m_front), v[2], appearance); + } +} + +void Direct3DRMPaletteSWRenderer::DrawTriangleProjected( + const D3DRMVERTEX& v0, + const D3DRMVERTEX& v1, + const D3DRMVERTEX& v2, + const Appearance& appearance +) +{ + if (PalIsBackface(v0.position, v1.position, v2.position)) { + return; + } + + D3DRMVECTOR4D p0, p1, p2; + ProjectVertex(v0.position, p0); + ProjectVertex(v1.position, p1); + ProjectVertex(v2.position, p2); + + Uint8 b0 = ApplyLighting(v0.position, v0.normal, appearance, 0); + Uint8 b1 = b0, b2 = b0; + if (!appearance.flat) { + b1 = ApplyLighting(v1.position, v1.normal, appearance, 0); + b2 = ApplyLighting(v2.position, v2.normal, appearance, 0); + } + + Uint8* pixels = static_cast(m_renderedImage->pixels); + int pitch = m_renderedImage->pitch; + + PalVertexXY verts[3] = { + {p0.x, p0.y, p0.z, p0.w, b0, 0, 0, 0}, + {p1.x, p1.y, p1.z, p1.w, b1, 0, 0, 0}, + {p2.x, p2.y, p2.z, p2.w, b2, 0, 0, 0}, + }; + + Uint32 textureId = appearance.textureId; + int texturePitch = 0; + Uint8* texels = nullptr; + int texWidthScale = 0; + int texHeightScale = 0; + + if (textureId != NO_TEXTURE_ID) { + SDL_Surface* texture = m_textures[textureId].cached; + if (texture) { + texturePitch = texture->pitch; + texels = static_cast(texture->pixels); + texWidthScale = texture->w - 1; + texHeightScale = texture->h - 1; + } + + verts[0].u_over_w = v0.texCoord.u / p0.w; + verts[0].v_over_w = v0.texCoord.v / p0.w; + verts[0].one_over_w = 1.0f / p0.w; + + verts[1].u_over_w = v1.texCoord.u / p1.w; + verts[1].v_over_w = v1.texCoord.v / p1.w; + verts[1].one_over_w = 1.0f / p1.w; + + verts[2].u_over_w = v2.texCoord.u / p2.w; + verts[2].v_over_w = v2.texCoord.v / p2.w; + verts[2].one_over_w = 1.0f / p2.w; + } + + // Sort verts + if (verts[0].y > verts[1].y) { + std::swap(verts[0], verts[1]); + } + if (verts[1].y > verts[2].y) { + std::swap(verts[1], verts[2]); + } + if (verts[0].y > verts[1].y) { + std::swap(verts[0], verts[1]); + } + + int minY = std::max(0, static_cast(std::ceil(verts[0].y))); + int maxY = std::min(m_height - 1, static_cast(std::floor(verts[2].y))); + + // For untextured triangles, find the nearest palette entry for the + // material colour so we can use the LUT. + Uint8 materialPalIdx = 0; + if (!texels && m_palette) { + Uint8 mr = appearance.color.r; + Uint8 mg = appearance.color.g; + Uint8 mb = appearance.color.b; + int bestDist = INT_MAX; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - mr; + int dg = m_palette->colors[c].g - mg; + int db = m_palette->colors[c].b - mb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + materialPalIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + } + + Uint8 alpha = appearance.color.a; + + // --- Set up incremental edge stepping --- + // Long edge: verts[0] -> verts[2] (always the "right" side before swap) + float longDy = verts[2].y - verts[0].y; + float invLongDy = (longDy != 0.0f) ? 1.0f / longDy : 0.0f; + // Long edge values at minY + float longT0 = (minY - verts[0].y) * invLongDy; + PalVertexXY longEdge; + longEdge.x = verts[0].x + longT0 * (verts[2].x - verts[0].x); + longEdge.z = verts[0].z + longT0 * (verts[2].z - verts[0].z); + longEdge.u_over_w = verts[0].u_over_w + longT0 * (verts[2].u_over_w - verts[0].u_over_w); + longEdge.v_over_w = verts[0].v_over_w + longT0 * (verts[2].v_over_w - verts[0].v_over_w); + longEdge.one_over_w = verts[0].one_over_w + longT0 * (verts[2].one_over_w - verts[0].one_over_w); + float longBri = verts[0].brightness + longT0 * (static_cast(verts[2].brightness) - verts[0].brightness); + // Long edge step per scanline + float longStepX = (verts[2].x - verts[0].x) * invLongDy; + float longStepZ = (verts[2].z - verts[0].z) * invLongDy; + float longStepBri = (static_cast(verts[2].brightness) - verts[0].brightness) * invLongDy; + float longStepUW = (verts[2].u_over_w - verts[0].u_over_w) * invLongDy; + float longStepVW = (verts[2].v_over_w - verts[0].v_over_w) * invLongDy; + float longStepOW = (verts[2].one_over_w - verts[0].one_over_w) * invLongDy; + + // Short edge: verts[0]->verts[1] then verts[1]->verts[2] + // We set up the first segment and re-init at the midpoint. + float shortBri; + auto setupShortEdge = [&](const PalVertexXY& a, + const PalVertexXY& b, + PalVertexXY& edge, + float& sBri, + float& stepX, + float& stepZ, + float& stepBri, + float& stepUW, + float& stepVW, + float& stepOW, + int startY) { + float dy = b.y - a.y; + float invDy = (dy != 0.0f) ? 1.0f / dy : 0.0f; + float t0 = (startY - a.y) * invDy; + edge.x = a.x + t0 * (b.x - a.x); + edge.z = a.z + t0 * (b.z - a.z); + sBri = a.brightness + t0 * (static_cast(b.brightness) - a.brightness); + edge.u_over_w = a.u_over_w + t0 * (b.u_over_w - a.u_over_w); + edge.v_over_w = a.v_over_w + t0 * (b.v_over_w - a.v_over_w); + edge.one_over_w = a.one_over_w + t0 * (b.one_over_w - a.one_over_w); + stepX = (b.x - a.x) * invDy; + stepZ = (b.z - a.z) * invDy; + stepBri = (static_cast(b.brightness) - a.brightness) * invDy; + stepUW = (b.u_over_w - a.u_over_w) * invDy; + stepVW = (b.v_over_w - a.v_over_w) * invDy; + stepOW = (b.one_over_w - a.one_over_w) * invDy; + }; + + PalVertexXY shortEdge; + float shortStepX, shortStepZ, shortStepBri, shortStepUW, shortStepVW, shortStepOW; + int midY = static_cast(std::ceil(verts[1].y)); + bool pastMid = (minY >= midY); + if (pastMid) { + setupShortEdge( + verts[1], + verts[2], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + minY + ); + } + else { + setupShortEdge( + verts[0], + verts[1], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + minY + ); + } + + // Precompute material LUT row pointer for untextured triangles + const Uint8* materialLightRow = texels ? nullptr : &m_lightLUT[materialPalIdx * LIGHT_LEVELS]; + + for (int y = minY; y <= maxY; ++y) { + // Switch to second short edge segment at midpoint + if (!pastMid && y >= midY) { + pastMid = true; + setupShortEdge( + verts[1], + verts[2], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + y + ); + } + + // Determine left/right from the two edges + float lx, lz, lBri, lUW, lVW, lOW; + float rx, rz, rBri, rUW, rVW, rOW; + if (shortEdge.x <= longEdge.x) { + lx = shortEdge.x; + lz = shortEdge.z; + lBri = shortBri; + lUW = shortEdge.u_over_w; + lVW = shortEdge.v_over_w; + lOW = shortEdge.one_over_w; + rx = longEdge.x; + rz = longEdge.z; + rBri = longBri; + rUW = longEdge.u_over_w; + rVW = longEdge.v_over_w; + rOW = longEdge.one_over_w; + } + else { + lx = longEdge.x; + lz = longEdge.z; + lBri = longBri; + lUW = longEdge.u_over_w; + lVW = longEdge.v_over_w; + lOW = longEdge.one_over_w; + rx = shortEdge.x; + rz = shortEdge.z; + rBri = shortBri; + rUW = shortEdge.u_over_w; + rVW = shortEdge.v_over_w; + rOW = shortEdge.one_over_w; + } + + int startX = std::max(0, static_cast(std::ceil(lx))); + int endX = std::min(m_width - 1, static_cast(std::floor(rx))); + + float span = rx - lx; + if (span <= 0.0f || startX > endX) { + // Step edges and continue + shortEdge.x += shortStepX; + shortEdge.z += shortStepZ; + shortBri += shortStepBri; + shortEdge.u_over_w += shortStepUW; + shortEdge.v_over_w += shortStepVW; + shortEdge.one_over_w += shortStepOW; + longEdge.x += longStepX; + longEdge.z += longStepZ; + longBri += longStepBri; + longEdge.u_over_w += longStepUW; + longEdge.v_over_w += longStepVW; + longEdge.one_over_w += longStepOW; + continue; + } + + float invSpan = 1.0f / span; + + // Precompute per-pixel step values + float zStep = (rz - lz) * invSpan; + float startT = (startX - lx) * invSpan; + float z = lz + startT * (rz - lz); + + // Integer brightness with 8-bit fractional part for stepping + int briFix = static_cast((lBri + startT * (rBri - lBri)) * 256.0f); + int briStepFix = static_cast((rBri - lBri) * invSpan * 256.0f); + + Uint8* row = pixels + y * pitch; + float* zPtr = &m_zBuffer[y * m_width + startX]; + + if (texels) { + // --- Textured scanline with periodic perspective correction --- + float uow = lUW + startT * (rUW - lUW); + float vow = lVW + startT * (rVW - lVW); + float oow = lOW + startT * (rOW - lOW); + float uowStep = (rUW - lUW) * invSpan; + float vowStep = (rVW - lVW) * invSpan; + float oowStep = (rOW - lOW) * invSpan; + + int x = startX; + while (x <= endX) { + // Perspective correction at this point + float inv_w0 = 1.0f / oow; + float u0 = uow * inv_w0; + float v0 = vow * inv_w0; + + int remaining = endX - x + 1; + int blockLen = (remaining > PERSP_STEP) ? PERSP_STEP : remaining; + + // Compute end-of-block perspective-correct UVs + float uowEnd = uow + uowStep * blockLen; + float vowEnd = vow + vowStep * blockLen; + float oowEnd = oow + oowStep * blockLen; + + float inv_w1 = 1.0f / oowEnd; + float u1 = uowEnd * inv_w1; + float v1 = vowEnd * inv_w1; + + // Affine step within this block + float invBlock = (blockLen > 1) ? (1.0f / blockLen) : 0.0f; + float uAffStep = (u1 - u0) * invBlock; + float vAffStep = (v1 - v0) * invBlock; + float uAff = u0; + float vAff = v0; + + float zLocal = z; + int briLocal = briFix; + float* zP = zPtr; + + for (int i = 0; i < blockLen; ++i, ++x) { + if (zLocal < *zP) { + int bri = briLocal >> 8; + if (bri < 0) { + bri = 0; + } + else if (bri >= LIGHT_LEVELS) { + bri = LIGHT_LEVELS - 1; + } + + // Fast UV tile: wrap to [0,1) + float uTile = uAff; + float vTile = vAff; + int ui = static_cast(uTile); + int vi = static_cast(vTile); + uTile -= ui; + vTile -= vi; + if (uTile < 0.0f) { + uTile += 1.0f; + } + if (vTile < 0.0f) { + vTile += 1.0f; + } + + int texX = static_cast(uTile * texWidthScale); + int texY = static_cast(vTile * texHeightScale); + + Uint8 texel = texels[texY * texturePitch + texX]; + + Uint8 palIdx = m_lightLUT[texel * LIGHT_LEVELS + bri]; + if (m_transparencyEnabled) { + row[x] = m_blendLUT[palIdx * 256 + row[x]]; + } + else { + *zP = zLocal; + row[x] = palIdx; + } + } + zLocal += zStep; + briLocal += briStepFix; + uAff += uAffStep; + vAff += vAffStep; + ++zP; + } + + z = zLocal; + briFix = briLocal; + zPtr = zP; + uow = uowEnd; + vow = vowEnd; + oow = oowEnd; + } + } + else { + // --- Untextured scanline --- + if (alpha == 0) { + // Fully transparent material, skip entire scanline + } + else { + for (int x = startX; x <= endX; ++x, ++zPtr, z += zStep, briFix += briStepFix) { + if (z >= *zPtr) { + continue; + } + + int bri = briFix >> 8; + if (bri < 0) { + bri = 0; + } + else if (bri >= LIGHT_LEVELS) { + bri = LIGHT_LEVELS - 1; + } + + Uint8 palIdx = materialLightRow[bri]; + + if (m_transparencyEnabled) { + row[x] = m_blendLUT[palIdx * 256 + row[x]]; + } + else { + *zPtr = z; + row[x] = palIdx; + } + } + } + } + + // Step both edges to next scanline + shortEdge.x += shortStepX; + shortEdge.z += shortStepZ; + shortBri += shortStepBri; + shortEdge.u_over_w += shortStepUW; + shortEdge.v_over_w += shortStepVW; + shortEdge.one_over_w += shortStepOW; + longEdge.x += longStepX; + longEdge.z += longStepZ; + longBri += longStepBri; + longEdge.u_over_w += longStepUW; + longEdge.v_over_w += longStepVW; + longEdge.one_over_w += longStepOW; + } +} + +struct PalCacheDestroyContext { + Direct3DRMPaletteSWRenderer* renderer; + Uint32 id; +}; + +void Direct3DRMPaletteSWRenderer::AddTextureDestroyCallback(Uint32 id, IDirect3DRMTexture* texture) +{ + auto* ctx = new PalCacheDestroyContext{this, id}; + texture->AddDestroyCallback( + [](IDirect3DRMObject* obj, void* arg) { + auto* ctx = static_cast(arg); + auto& cacheEntry = ctx->renderer->m_textures[ctx->id]; + if (cacheEntry.cached) { + // Only free surfaces we own (3D texture duplicates). + // UI textures point to the original surface — don't free those. + auto* origTexture = static_cast(cacheEntry.texture); + auto* origSurface = static_cast(origTexture->m_surface); + if (cacheEntry.cached != origSurface->m_surface) { + SDL_UnlockSurface(cacheEntry.cached); + SDL_DestroySurface(cacheEntry.cached); + } + cacheEntry.cached = nullptr; + cacheEntry.texture = nullptr; + } + delete ctx; + }, + ctx + ); +} + +// Build a 256-byte remap table from a texture's own palette to the game +// palette. For each source index, find the nearest colour in the game +// palette by Euclidean distance in RGB. +static void BuildPaletteRemap(Uint8* remap, SDL_Palette* srcPal, SDL_Palette* dstPal) +{ + if (!srcPal || !dstPal) { + // Identity if either palette is missing. + for (int i = 0; i < 256; ++i) { + remap[i] = static_cast(i); + } + return; + } + + const SDL_Color* sc = srcPal->colors; + const SDL_Color* dc = dstPal->colors; + int dn = dstPal->ncolors; + + for (int i = 0; i < 256; ++i) { + if (i >= srcPal->ncolors) { + remap[i] = 0; + continue; + } + int sr = sc[i].r, sg = sc[i].g, sb = sc[i].b; + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < dn; ++c) { + int dr = dc[c].r - sr; + int dg = dc[c].g - sg; + int db = dc[c].b - sb; + // Redmean approximation for perceptual color distance. + // Weights red and blue channels based on the average red + // value of the two colors being compared. This better + // preserves hue than plain Euclidean RGB distance. + int rmean = (sr + dc[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + remap[i] = bestIdx; + } +} + +// Apply a remap table to every pixel in an INDEX8 surface (in-place). +static void RemapSurfacePixels(SDL_Surface* surf, const Uint8* remap) +{ + Uint8* px = static_cast(surf->pixels); + int pitch = surf->pitch; + for (int y = 0; y < surf->h; ++y) { + Uint8* row = px + y * pitch; + for (int x = 0; x < surf->w; ++x) { + row[x] = remap[row[x]]; + } + } +} + +// Remap an already-duplicated surface's pixels from its own palette to the +// given target palette. Called from BeginFrame so the remap always uses the +// palette that will be active for this frame's Flip. +static void RemapSurfaceToTargetPalette(SDL_Surface* surf, SDL_Palette* targetPal) +{ + SDL_Palette* srcPal = SDL_GetSurfacePalette(surf); + if (!srcPal || !targetPal || srcPal == targetPal) { + return; + } + + Uint8 remap[256]; + BuildPaletteRemap(remap, srcPal, targetPal); + + bool wasLocked = (surf->flags & SDL_SURFACE_LOCKED) != 0; + if (!wasLocked) { + SDL_LockSurface(surf); + } + RemapSurfacePixels(surf, remap); + if (!wasLocked) { + SDL_UnlockSurface(surf); + } + + SDL_SetSurfacePalette(surf, targetPal); +} + +Uint32 Direct3DRMPaletteSWRenderer::GetTextureId(IDirect3DRMTexture* iTexture, bool isUI, float scaleX, float scaleY) +{ + auto texture = static_cast(iTexture); + auto surface = static_cast(texture->m_surface); + + // Check if already mapped + for (Uint32 i = 0; i < m_textures.size(); ++i) { + auto& texRef = m_textures[i]; + if (texRef.texture == texture) { + if (isUI) { + // UI textures: always use the original surface directly. + // The game modifies these in-place (e.g. mosaic transition), + // so a cached duplicate would be stale. + texRef.cached = surface->m_surface; + } + else if (texRef.version != texture->m_version || !texRef.cached) { + if (texRef.cached) { + SDL_DestroySurface(texRef.cached); + } + // 3D textures: duplicate and remap to the flip palette. + texRef.cached = SDL_DuplicateSurface(surface->m_surface); + SDL_LockSurface(texRef.cached); + if (m_flipPalette) { + RemapSurfaceToTargetPalette(texRef.cached, m_flipPalette); + } + texRef.version = texture->m_version; + } + return i; + } + } + + SDL_Surface* converted; + if (isUI) { + // Use the original surface directly — no duplicate. + converted = surface->m_surface; + } + else { + // 3D textures: duplicate and remap to the flip palette. + converted = SDL_DuplicateSurface(surface->m_surface); + SDL_LockSurface(converted); + if (m_flipPalette) { + RemapSurfaceToTargetPalette(converted, m_flipPalette); + } + } + + // Reuse freed slot + for (Uint32 i = 0; i < m_textures.size(); ++i) { + auto& texRef = m_textures[i]; + if (!texRef.texture) { + texRef = {texture, texture->m_version, converted}; + AddTextureDestroyCallback(i, texture); + return i; + } + } + + m_textures.push_back({texture, texture->m_version, converted}); + AddTextureDestroyCallback(static_cast(m_textures.size() - 1), texture); + return static_cast(m_textures.size() - 1); +} + +static PaletteMeshCache PalUploadMesh(const MeshGroup& meshGroup) +{ + PaletteMeshCache cache{&meshGroup, meshGroup.version}; + cache.flat = meshGroup.quality == D3DRMRENDER_FLAT || meshGroup.quality == D3DRMRENDER_UNLITFLAT; + + if (cache.flat) { + FlattenSurfaces( + meshGroup.vertices.data(), + meshGroup.vertices.size(), + meshGroup.indices.data(), + meshGroup.indices.size(), + meshGroup.texture != nullptr, + cache.vertices, + cache.indices + ); + } + else { + cache.vertices.assign(meshGroup.vertices.begin(), meshGroup.vertices.end()); + cache.indices.assign(meshGroup.indices.begin(), meshGroup.indices.end()); + } + + return cache; +} + +void Direct3DRMPaletteSWRenderer::AddMeshDestroyCallback(Uint32 id, IDirect3DRMMesh* mesh) +{ + auto* ctx = new PalCacheDestroyContext{this, id}; + mesh->AddDestroyCallback( + [](IDirect3DRMObject* obj, void* arg) { + auto* ctx = static_cast(arg); + auto& cacheEntry = ctx->renderer->m_meshes[ctx->id]; + if (cacheEntry.meshGroup) { + cacheEntry.meshGroup = nullptr; + cacheEntry.vertices.clear(); + cacheEntry.indices.clear(); + } + delete ctx; + }, + ctx + ); +} + +Uint32 Direct3DRMPaletteSWRenderer::GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) +{ + for (Uint32 i = 0; i < m_meshes.size(); ++i) { + auto& cache = m_meshes[i]; + if (cache.meshGroup == meshGroup) { + if (cache.version != meshGroup->version) { + cache = std::move(PalUploadMesh(*meshGroup)); + } + return i; + } + } + + auto newCache = PalUploadMesh(*meshGroup); + + for (Uint32 i = 0; i < m_meshes.size(); ++i) { + auto& cache = m_meshes[i]; + if (!cache.meshGroup) { + cache = std::move(newCache); + AddMeshDestroyCallback(i, mesh); + return i; + } + } + + m_meshes.push_back(std::move(newCache)); + AddMeshDestroyCallback((Uint32) (m_meshes.size() - 1), mesh); + return (Uint32) (m_meshes.size() - 1); +} + +HRESULT Direct3DRMPaletteSWRenderer::BeginFrame() +{ + if (!m_renderedImage || !SDL_LockSurface(m_renderedImage)) { + return DDERR_GENERIC; + } + + // Rebuild lighting LUT if palette changed + if (m_lightLUTDirty) { + m_palette = SDL_GetSurfacePalette(m_renderedImage); + BuildLightingLUT(); + BuildBlendLUT(); + } + + // Use the palette snapshot from the previous Flip (m_flipPalette) for + // texture remapping. Only remap when the flip palette actually changes + // (i.e. on scene transitions), not every frame. + if (m_flipPalette && m_flipPaletteDirty) { + m_flipPaletteDirty = false; + + int grassGreens = 0; + for (int i = 0; i < m_flipPalette->ncolors; ++i) { + SDL_Color c = m_flipPalette->colors[i]; + if (c.g >= 60 && c.g <= 125 && c.r >= 35 && c.r <= 95 && c.b >= 20 && c.b <= 65 && c.g > c.r) { + grassGreens++; + } + } + int invalidated = 0; + + // Invalidate all cached 3D textures so they get re-remapped + // against the new palette on next use in GetTextureId. + for (auto& texRef : m_textures) { + if (!texRef.texture || !texRef.cached) { + continue; + } + auto* origSurface = + static_cast(static_cast(texRef.texture)->m_surface); + if (texRef.cached == origSurface->m_surface) { + continue; + } + SDL_UnlockSurface(texRef.cached); + SDL_DestroySurface(texRef.cached); + texRef.cached = nullptr; + texRef.version = 0; + invalidated++; + } + + // Rebuild lighting/blend LUTs for the new palette + BuildLightingLUT(); + BuildBlendLUT(); + } + + ClearZBuffer(); + m_transparencyEnabled = false; + return DD_OK; +} + +void Direct3DRMPaletteSWRenderer::EnableTransparency() +{ + m_transparencyEnabled = true; +} + +void Direct3DRMPaletteSWRenderer::SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance +) +{ + memcpy(m_normalMatrix, normalMatrix, sizeof(Matrix3x3)); + + auto& mesh = m_meshes[meshId]; + + // Pre-transform all vertex positions and normals + m_transformedVerts.clear(); + m_transformedVerts.reserve(mesh.vertices.size()); + for (const auto& src : mesh.vertices) { + D3DRMVERTEX& dst = m_transformedVerts.emplace_back(); + dst.position = TransformPoint(src.position, modelViewMatrix); + dst.normal = src.normal; + dst.texCoord = src.texCoord; + } + + // Assemble triangles using index buffer + for (size_t i = 0; i + 2 < mesh.indices.size(); i += 3) { + DrawTriangleClipped( + {m_transformedVerts[mesh.indices[i]], + m_transformedVerts[mesh.indices[i + 1]], + m_transformedVerts[mesh.indices[i + 2]]}, + appearance + ); + } +} + +HRESULT Direct3DRMPaletteSWRenderer::FinalizeFrame() +{ + SDL_UnlockSurface(m_renderedImage); + + return DD_OK; +} + +void Direct3DRMPaletteSWRenderer::Resize(int width, int height, const ViewportTransform& viewportTransform) +{ + m_viewportTransform = viewportTransform; + float aspect = static_cast(width) / height; + float virtualAspect = static_cast(m_virtualWidth) / m_virtualHeight; + + // Cap to virtual canvase for performance + if (aspect > virtualAspect) { + m_height = std::min(height, (int) m_virtualHeight); + m_width = static_cast(m_height * aspect); + } + else { + m_width = std::min(width, (int) m_virtualWidth); + m_height = static_cast(m_width / aspect); + } + + m_viewportTransform.scale = + std::min(static_cast(m_width) / m_virtualWidth, static_cast(m_height) / m_virtualHeight); + + m_viewportTransform.offsetX = (m_width - (m_virtualWidth * m_viewportTransform.scale)) / 2.0f; + m_viewportTransform.offsetY = (m_height - (m_virtualHeight * m_viewportTransform.scale)) / 2.0f; + + if (m_renderedImage) { + SDL_DestroySurface(m_renderedImage); + } + m_renderedImage = SDL_CreateSurface(m_width, m_height, SDL_PIXELFORMAT_INDEX8); + + // If we already have a palette, attach it to the new surface + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + } + + m_zBuffer.resize(m_width * m_height); +} + +void Direct3DRMPaletteSWRenderer::Clear(float r, float g, float b) +{ + if (!m_palette) { + SDL_FillSurfaceRect(m_renderedImage, nullptr, 0); + return; + } + + // Find nearest palette entry + Uint8 tr = static_cast(r * 255); + Uint8 tg = static_cast(g * 255); + Uint8 tb = static_cast(b * 255); + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - tr; + int dg = m_palette->colors[c].g - tg; + int db = m_palette->colors[c].b - tb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + SDL_FillSurfaceRect(m_renderedImage, nullptr, bestIdx); +} + +void Direct3DRMPaletteSWRenderer::Flip() +{ + if (!m_renderedImage || !m_renderedImage->pixels) { + return; + } + + SDL_Surface* winSurface = SDL_GetWindowSurface(DDWindow); + if (!winSurface) { + return; + } + if (!winSurface->pixels) { + return; + } + + if (winSurface->format == SDL_PIXELFORMAT_INDEX8) { + // Window surface is paletted — copy indices directly and set + // the palette on the destination so the DAC/display picks it up. + if (m_palette) { + SDL_SetSurfacePalette(winSurface, m_palette); + } + + Uint8* src = static_cast(m_renderedImage->pixels); + Uint8* dst = static_cast(winSurface->pixels); + int srcPitch = m_renderedImage->pitch; + int dstPitch = winSurface->pitch; + + if (m_width * 2 <= winSurface->w && m_height * 2 <= winSurface->h) { + // 2x nearest-neighbor upscale (half-res rendering) + for (int row = 0; row < m_height; ++row) { + Uint8* srcRow = src + row * srcPitch; + Uint8* dstRow0 = dst + (row * 2) * dstPitch; + Uint8* dstRow1 = dstRow0 + dstPitch; + for (int col = 0; col < m_width; ++col) { + Uint8 px = srcRow[col]; + dstRow0[col * 2] = px; + dstRow0[col * 2 + 1] = px; + dstRow1[col * 2] = px; + dstRow1[col * 2 + 1] = px; + } + } + } + else { + int copyH = std::min(m_height, winSurface->h); + int copyW = std::min(m_width, winSurface->w); + if (srcPitch == dstPitch && copyW == m_width) { + memcpy(dst, src, static_cast(srcPitch) * copyH); + } + else { + for (int row = 0; row < copyH; ++row) { + memcpy(dst + row * dstPitch, src + row * srcPitch, copyW); + } + } + } + } + else { + // Window surface is not paletted — let SDL convert INDEX8 → dest format. + // Use scaled blit to handle fullscreen on high-res displays. + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + SDL_BlitSurfaceScaled(m_renderedImage, nullptr, winSurface, nullptr, SDL_SCALEMODE_NEAREST); + } + } + + SDL_UpdateWindowSurface(DDWindow); + + // Snapshot the palette for the lighting LUT. + SDL_Palette* displayedPal = + (winSurface->format == SDL_PIXELFORMAT_INDEX8) ? SDL_GetSurfacePalette(winSurface) : m_palette; + if (displayedPal) { + if (!m_flipPalette) { + m_flipPalette = SDL_CreatePalette(256); + } + if (!PalettesEqual(displayedPal, m_flipPalette)) { + SDL_SetPaletteColors(m_flipPalette, displayedPal->colors, 0, displayedPal->ncolors); + m_flipPaletteDirty = true; + } + } +} + +void Direct3DRMPaletteSWRenderer::Draw2DImage( + Uint32 textureId, + const SDL_Rect& srcRect, + const SDL_Rect& dstRect, + FColor color +) +{ + SDL_Rect centeredRect = { + static_cast(dstRect.x * m_viewportTransform.scale + m_viewportTransform.offsetX), + static_cast(dstRect.y * m_viewportTransform.scale + m_viewportTransform.offsetY), + static_cast(dstRect.w * m_viewportTransform.scale), + static_cast(dstRect.h * m_viewportTransform.scale), + }; + + if (textureId == NO_TEXTURE_ID) { + // Fill with nearest palette colour + if (m_palette) { + Uint8 tr = static_cast(color.r * 255); + Uint8 tg = static_cast(color.g * 255); + Uint8 tb = static_cast(color.b * 255); + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - tr; + int dg = m_palette->colors[c].g - tg; + int db = m_palette->colors[c].b - tb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + SDL_FillSurfaceRect(m_renderedImage, ¢eredRect, bestIdx); + } + else { + SDL_FillSurfaceRect(m_renderedImage, ¢eredRect, 0); + } + return; + } + + // Raw INDEX8 blit — copy palette indices directly, no SDL palette + // remapping. This is the hot path for 2D (video, UI overlays). + SDL_Surface* surface = m_textures[textureId].cached; + + // Only check the surface color key when the caller explicitly requested it + // (via DDBLT_KEYSRC / DDBLTFAST_SRCCOLORKEY). Many surfaces have a stale + // color key set that should not be used for normal blits (e.g. SMK video). + Uint32 colorKey = 0; + bool hasColorKey = SDL_GetSurfaceColorKey(surface, &colorKey); + + bool wasLocked = (surface->flags & SDL_SURFACE_LOCKED) != 0; + if (wasLocked) { + SDL_UnlockSurface(surface); + } + Uint8* src = static_cast(surface->pixels); + Uint8* dst = static_cast(m_renderedImage->pixels); + int srcPitch = surface->pitch; + int dstPitch = m_renderedImage->pitch; + + int dstX0 = std::max(0, centeredRect.x); + int dstY0 = std::max(0, centeredRect.y); + int dstX1 = std::min(m_width, centeredRect.x + centeredRect.w); + int dstY1 = std::min(m_height, centeredRect.y + centeredRect.h); + + Uint8 ckByte = static_cast(colorKey); + + if (!hasColorKey && centeredRect.w == srcRect.w && centeredRect.h == srcRect.h) { + // 1:1 opaque copy — fast memcpy per scanline + int copyW = dstX1 - dstX0; + int copyH = dstY1 - dstY0; + if (copyW > 0 && copyH > 0) { + int srcStartX = srcRect.x + (dstX0 - centeredRect.x); + int srcStartY = srcRect.y + (dstY0 - centeredRect.y); + for (int row = 0; row < copyH; ++row) { + memcpy(dst + (dstY0 + row) * dstPitch + dstX0, src + (srcStartY + row) * srcPitch + srcStartX, copyW); + } + } + } + else if (centeredRect.w == srcRect.w && centeredRect.h == srcRect.h) { + // 1:1 copy with color key + int copyW = dstX1 - dstX0; + int copyH = dstY1 - dstY0; + int srcStartX = srcRect.x + (dstX0 - centeredRect.x); + int srcStartY = srcRect.y + (dstY0 - centeredRect.y); + for (int row = 0; row < copyH; ++row) { + Uint8* srcRow = src + (srcStartY + row) * srcPitch + srcStartX; + Uint8* dstRow = dst + (dstY0 + row) * dstPitch + dstX0; + for (int col = 0; col < copyW; ++col) { + Uint8 px = srcRow[col]; + if (px != ckByte) { + dstRow[col] = px; + } + } + } + } + else if (!hasColorKey) { + // Scaled blit, no color key + for (int dy = dstY0; dy < dstY1; ++dy) { + int sy = srcRect.y + (dy - centeredRect.y) * srcRect.h / centeredRect.h; + Uint8* dstRow = dst + dy * dstPitch; + Uint8* srcRow = src + sy * srcPitch; + for (int dx = dstX0; dx < dstX1; ++dx) { + int sx = srcRect.x + (dx - centeredRect.x) * srcRect.w / centeredRect.w; + dstRow[dx] = srcRow[sx]; + } + } + } + else { + // Scaled blit with color key + for (int dy = dstY0; dy < dstY1; ++dy) { + int sy = srcRect.y + (dy - centeredRect.y) * srcRect.h / centeredRect.h; + Uint8* dstRow = dst + dy * dstPitch; + Uint8* srcRow = src + sy * srcPitch; + for (int dx = dstX0; dx < dstX1; ++dx) { + int sx = srcRect.x + (dx - centeredRect.x) * srcRect.w / centeredRect.w; + Uint8 px = srcRow[sx]; + if (px != ckByte) { + dstRow[dx] = px; + } + } + } + } + if (wasLocked) { + SDL_LockSurface(surface); + } +} + +void Direct3DRMPaletteSWRenderer::SetDither(bool dither) +{ + (void) dither; +} + +void Direct3DRMPaletteSWRenderer::SetPalette(SDL_Palette* palette) +{ + m_palette = palette; + m_lightLUTDirty = true; + if (m_renderedImage) { + SDL_SetSurfacePalette(m_renderedImage, palette); + } +} + +void Direct3DRMPaletteSWRenderer::Download(SDL_Surface* target) +{ + if (!m_renderedImage || !target) { + return; + } + + // Extract the viewport region (excluding pillarbox/letterbox borders) + // and scale it to fill the target, matching the software renderer. + SDL_Rect srcRect = { + static_cast(m_viewportTransform.offsetX), + static_cast(m_viewportTransform.offsetY), + static_cast(m_virtualWidth * m_viewportTransform.scale), + static_cast(m_virtualHeight * m_viewportTransform.scale), + }; + + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + } + SDL_BlitSurfaceScaled(m_renderedImage, &srcRect, target, nullptr, SDL_SCALEMODE_NEAREST); +} diff --git a/miniwin/src/d3drm/backends/software/renderer.cpp b/miniwin/src/d3drm/backends/software/renderer.cpp index b945fbc9..796cfc92 100644 --- a/miniwin/src/d3drm/backends/software/renderer.cpp +++ b/miniwin/src/d3drm/backends/software/renderer.cpp @@ -64,14 +64,14 @@ void Direct3DRMSoftwareRenderer::ClearZBuffer() const float inf = std::numeric_limits::infinity(); size_t i = 0; -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) +#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && defined(__SSE2__) if (SDL_HasSSE2()) { __m128 inf4 = _mm_set1_ps(inf); for (; i + 4 <= size; i += 4) { _mm_storeu_ps(&m_zBuffer[i], inf4); } } -#if defined(__i386__) || defined(_M_IX86) +#if (defined(__i386__) || defined(_M_IX86)) && defined(__MMX__) else if (SDL_HasMMX()) { const __m64 mm_inf = _mm_set_pi32(0x7F800000, 0x7F800000); for (; i + 2 <= size; i += 2) { @@ -826,6 +826,13 @@ void Direct3DRMSoftwareRenderer::SetDither(bool dither) { } +void Direct3DRMSoftwareRenderer::SetPalette(SDL_Palette* palette) +{ + if (m_renderedImage) { + SDL_SetSurfacePalette(m_renderedImage, palette); + } +} + void Direct3DRMSoftwareRenderer::Download(SDL_Surface* target) { SDL_Rect srcRect = { diff --git a/miniwin/src/d3drm/d3drmrenderer.cpp b/miniwin/src/d3drm/d3drmrenderer.cpp index b30c5b23..d359fb16 100644 --- a/miniwin/src/d3drm/d3drmrenderer.cpp +++ b/miniwin/src/d3drm/d3drmrenderer.cpp @@ -20,9 +20,15 @@ #ifdef USE_SOFTWARE_RENDER #include "d3drmrenderer_software.h" #endif +#ifdef USE_PALETTE_SW_RENDER +#include "d3drmrenderer_palettesw.h" +#endif #ifdef USE_GXM #include "d3drmrenderer_gxm.h" #endif +#ifdef USE_GLIDE +#include "d3drmrenderer_glide.h" +#endif Direct3DRMRenderer* CreateDirect3DRMRenderer( const IDirect3DMiniwin* d3d, @@ -74,6 +80,16 @@ Direct3DRMRenderer* CreateDirect3DRMRenderer( if (SDL_memcmp(guid, &GXM_GUID, sizeof(GUID)) == 0) { return GXMRenderer::Create(DDSDesc.dwWidth, DDSDesc.dwHeight, d3d->GetMSAASamples()); } +#endif +#ifdef USE_GLIDE + if (SDL_memcmp(guid, &GLIDE_GUID, sizeof(GUID)) == 0) { + return new Direct3DRMGlideRenderer(DDSDesc.dwWidth, DDSDesc.dwHeight); + } +#endif +#ifdef USE_PALETTE_SW_RENDER + if (SDL_memcmp(guid, &PALETTE_SW_GUID, sizeof(GUID)) == 0) { + return new Direct3DRMPaletteSWRenderer(DDSDesc.dwWidth, DDSDesc.dwHeight); + } #endif return nullptr; } @@ -101,6 +117,12 @@ void Direct3DRMRenderer_EnumDevices(const IDirect3DMiniwin* d3d, LPD3DENUMDEVICE #ifdef USE_SOFTWARE_RENDER Direct3DRMSoftware_EnumDevice(cb, ctx); #endif +#ifdef USE_PALETTE_SW_RENDER + Direct3DRMPaletteSW_EnumDevice(cb, ctx); +#endif +#ifdef USE_GLIDE + Direct3DRMGlide_EnumDevice(cb, ctx); +#endif #ifdef USE_GXM GXMRenderer_EnumDevice(cb, ctx); #endif diff --git a/miniwin/src/ddraw/ddraw.cpp b/miniwin/src/ddraw/ddraw.cpp index b28a105b..c79b334f 100644 --- a/miniwin/src/ddraw/ddraw.cpp +++ b/miniwin/src/ddraw/ddraw.cpp @@ -245,7 +245,12 @@ HRESULT DirectDrawImpl::GetDisplayMode(LPDDSURFACEDESC lpDDSurfaceDesc) #ifdef MINIWIN_PIXELFORMAT format = MINIWIN_PIXELFORMAT; #else - format = mode->format; + if (m_virtualBPP == 8 || (m_frameBuffer && m_frameBuffer->IsIndex8())) { + format = SDL_PIXELFORMAT_INDEX8; + } + else { + format = mode->format; + } #endif const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(format); @@ -308,6 +313,7 @@ HRESULT DirectDrawImpl::SetDisplayMode(DWORD dwWidth, DWORD dwHeight, DWORD dwBP { m_virtualWidth = dwWidth; m_virtualHeight = dwHeight; + m_virtualBPP = dwBPP; return DD_OK; } diff --git a/miniwin/src/ddraw/ddsurface.cpp b/miniwin/src/ddraw/ddsurface.cpp index 58ee938a..04235a28 100644 --- a/miniwin/src/ddraw/ddsurface.cpp +++ b/miniwin/src/ddraw/ddsurface.cpp @@ -53,13 +53,20 @@ HRESULT DirectDrawSurfaceImpl::Blt( ) { if ((dwFlags & DDBLT_COLORFILL) == DDBLT_COLORFILL) { - Uint8 a = (lpDDBltFx->dwFillColor >> 24) & 0xFF; - Uint8 r = (lpDDBltFx->dwFillColor >> 16) & 0xFF; - Uint8 g = (lpDDBltFx->dwFillColor >> 8) & 0xFF; - Uint8 b = lpDDBltFx->dwFillColor & 0xFF; + Uint32 color; + if (m_surface->format == SDL_PIXELFORMAT_INDEX8) { + // For INDEX8 surfaces the fill color is a palette index, not RGBA. + color = lpDDBltFx->dwFillColor & 0xFF; + } + else { + Uint8 a = (lpDDBltFx->dwFillColor >> 24) & 0xFF; + Uint8 r = (lpDDBltFx->dwFillColor >> 16) & 0xFF; + Uint8 g = (lpDDBltFx->dwFillColor >> 8) & 0xFF; + Uint8 b = lpDDBltFx->dwFillColor & 0xFF; - const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(m_surface->format); - Uint32 color = SDL_MapRGBA(details, nullptr, r, g, b, a); + const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(m_surface->format); + color = SDL_MapRGBA(details, nullptr, r, g, b, a); + } if (lpDestRect) { SDL_Rect dstRect = ConvertRect(lpDestRect); SDL_FillSurfaceRect(m_surface, &dstRect, color); diff --git a/miniwin/src/ddraw/framebuffer.cpp b/miniwin/src/ddraw/framebuffer.cpp index 82022649..067596cd 100644 --- a/miniwin/src/ddraw/framebuffer.cpp +++ b/miniwin/src/ddraw/framebuffer.cpp @@ -9,7 +9,11 @@ FrameBufferImpl::FrameBufferImpl(DWORD virtualWidth, DWORD virtualHeight) : m_virtualWidth(virtualWidth), m_virtualHeight(virtualHeight) { +#ifdef __DJGPP__ + m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_INDEX8); +#else m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_RGBA32); +#endif } FrameBufferImpl::~FrameBufferImpl() @@ -49,7 +53,7 @@ HRESULT FrameBufferImpl::Blt( return DDERR_GENERIC; } - if (dynamic_cast(lpDDSrcSurface) == this) { + if (dynamic_cast(lpDDSrcSurface)) { return Flip(nullptr, DDFLIP_WAIT); } @@ -103,7 +107,11 @@ HRESULT FrameBufferImpl::BltFast( int height = lpSrcRect ? (lpSrcRect->bottom - lpSrcRect->top) : surface->m_surface->h; RECT destRect = {(int) dwX, (int) dwY, (int) (dwX + width), (int) (dwY + height)}; - return Blt(&destRect, lpDDSrcSurface, lpSrcRect, DDBLT_NONE, nullptr); + DDBltFlags flags = DDBLT_NONE; + if ((dwTrans & DDBLTFAST_SRCCOLORKEY) == DDBLTFAST_SRCCOLORKEY) { + flags = flags | DDBLT_KEYSRC; + } + return Blt(&destRect, lpDDSrcSurface, lpSrcRect, flags, nullptr); } HRESULT FrameBufferImpl::Flip(LPDIRECTDRAWSURFACE lpDDSurfaceTargetOverride, DDFlipFlags dwFlags) @@ -210,8 +218,13 @@ HRESULT FrameBufferImpl::SetColorKey(DDColorKeyFlags dwFlags, LPDDCOLORKEY lpDDC HRESULT FrameBufferImpl::SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) { - if (m_transferBuffer->m_surface->format != SDL_PIXELFORMAT_INDEX8) { - MINIWIN_NOT_IMPLEMENTED(); + // If the transfer buffer is not INDEX8 yet, recreate it — but only when + // the renderer actually works with paletted surfaces (palette SW / DOS). + // GL-based renderers use RGBA32 transfer buffers and convert on upload. + if (m_transferBuffer->m_surface->format != SDL_PIXELFORMAT_INDEX8 && DDRenderer && + DDRenderer->UsesPalettedSurfaces()) { + m_transferBuffer->Release(); + m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_INDEX8); } lpDDPalette->AddRef(); @@ -222,6 +235,11 @@ HRESULT FrameBufferImpl::SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) m_palette = lpDDPalette; SDL_SetSurfacePalette(m_transferBuffer->m_surface, ((DirectDrawPaletteImpl*) m_palette)->m_palette); + + if (DDRenderer) { + DDRenderer->SetPalette(((DirectDrawPaletteImpl*) m_palette)->m_palette); + } + return DD_OK; } diff --git a/miniwin/src/internal/d3drmrenderer.h b/miniwin/src/internal/d3drmrenderer.h index 7c19a8b4..6ddeaeb1 100644 --- a/miniwin/src/internal/d3drmrenderer.h +++ b/miniwin/src/internal/d3drmrenderer.h @@ -55,6 +55,8 @@ class Direct3DRMRenderer : public IDirect3DDevice2 { virtual void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) = 0; virtual void Download(SDL_Surface* target) = 0; virtual void SetDither(bool dither) = 0; + virtual void SetPalette(SDL_Palette* palette) {} + virtual bool UsesPalettedSurfaces() const { return false; } protected: int m_width, m_height; diff --git a/miniwin/src/internal/d3drmrenderer_glide.h b/miniwin/src/internal/d3drmrenderer_glide.h new file mode 100644 index 00000000..9721f145 --- /dev/null +++ b/miniwin/src/internal/d3drmrenderer_glide.h @@ -0,0 +1,110 @@ +#pragma once + +#include "d3drmrenderer.h" +#include "d3drmtexture_impl.h" +#include "ddraw_impl.h" + +extern "C" +{ +#include +} + +#ifdef GLIDE3 +// Glide 3 uses custom vertex layout - define our own struct +struct GlideVertex { + float x, y; // screen coords + float ooz; // 65535/Z (for Z-buffering) + float oow; // 1/w (for perspective correction) + float r, g, b, a; // color (0-255) + float sow, tow; // texture coords (s/w, t/w) +}; +#define GR_WDEPTHVALUE_FARTHEST 0xFFFF +#else +typedef GrVertex GlideVertex; +#endif + +#include + +// clang-format off +DEFINE_GUID(GLIDE_GUID, 0x682656F3, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08); +// clang-format on + +struct GlideTextureEntry { + IDirect3DRMTexture* texture; + Uint32 version; + GrTexInfo info; + FxU32 startAddress; + int texW; // actual power-of-2 width uploaded to Glide + int texH; // actual power-of-2 height uploaded to Glide +}; + +struct GlideMeshEntry { + const MeshGroup* meshGroup; + Uint32 version; + bool flat; + // Cached flattened geometry (only populated for flat-shaded meshes) + std::vector flatVertices; + std::vector flatIndices; +}; + +class Direct3DRMGlideRenderer : public Direct3DRMRenderer { +public: + Direct3DRMGlideRenderer(int width, int height); + ~Direct3DRMGlideRenderer() override; + + void PushLights(const SceneLight* lights, size_t count) override; + void SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) override; + void SetFrustumPlanes(const Plane* frustumPlanes) override; + Uint32 GetTextureId(IDirect3DRMTexture* texture, bool isUI = false, float scaleX = 0, float scaleY = 0) override; + Uint32 GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) override; + HRESULT BeginFrame() override; + void EnableTransparency() override; + void SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance + ) override; + HRESULT FinalizeFrame() override; + void Resize(int width, int height, const ViewportTransform& viewportTransform) override; + void Clear(float r, float g, float b) override; + void Flip() override; + void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) override; + void Download(SDL_Surface* target) override; + void SetDither(bool dither) override; + void SetPalette(SDL_Palette* palette) override; + +private: + std::vector m_textureCache; + std::vector m_meshCache; + std::vector m_lights; + D3DRMMATRIX4D m_projection; + D3DVALUE m_frontClip; + D3DVALUE m_backClip; + Plane m_frustumPlanes[6]; + std::vector m_transformedVertices; + std::vector m_litColors; + bool m_transparencyEnabled; + FxU32 m_nextTextureAddress; + SDL_Palette* m_palette = nullptr; + bool m_paletteUploaded = false; +#ifdef GLIDE3 + FxU32 m_glideContext = 0; +#endif +}; + +inline static void Direct3DRMGlide_EnumDevice(LPD3DENUMDEVICESCALLBACK cb, void* ctx) +{ + D3DDEVICEDESC halDesc = {}; + D3DDEVICEDESC helDesc = {}; + halDesc.dcmColorModel = D3DCOLOR_RGB; + halDesc.dwFlags = D3DDD_DEVICEZBUFFERBITDEPTH; + halDesc.dwDeviceZBufferBitDepth = DDBD_16; + halDesc.dwDeviceRenderBitDepth = DDBD_16; + halDesc.dpcTriCaps.dwTextureCaps = D3DPTEXTURECAPS_PERSPECTIVE; + halDesc.dpcTriCaps.dwShadeCaps = D3DPSHADECAPS_ALPHAFLATBLEND; + halDesc.dpcTriCaps.dwTextureFilterCaps = D3DPTFILTERCAPS_LINEAR; + EnumDevice(cb, ctx, "3dfx Glide", &halDesc, &helDesc, GLIDE_GUID); +} diff --git a/miniwin/src/internal/d3drmrenderer_palettesw.h b/miniwin/src/internal/d3drmrenderer_palettesw.h new file mode 100644 index 00000000..fbebe560 --- /dev/null +++ b/miniwin/src/internal/d3drmrenderer_palettesw.h @@ -0,0 +1,116 @@ +#pragma once + +#include "d3drmrenderer.h" +#include "d3drmtexture_impl.h" +#include "ddraw_impl.h" + +#include +#include +#include + +DEFINE_GUID(PALETTE_SW_GUID, 0x682656F3, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07); + +struct PaletteTextureCache { + Direct3DRMTextureImpl* texture; + Uint8 version; + SDL_Surface* cached; +}; + +struct PaletteMeshCache { + const MeshGroup* meshGroup; + int version; + bool flat; + std::vector vertices; + std::vector indices; +}; + +class Direct3DRMPaletteSWRenderer : public Direct3DRMRenderer { +public: + Direct3DRMPaletteSWRenderer(DWORD width, DWORD height); + ~Direct3DRMPaletteSWRenderer() override; + void PushLights(const SceneLight* vertices, size_t count) override; + Uint32 GetTextureId(IDirect3DRMTexture* texture, bool isUI, float scaleX, float scaleY) override; + Uint32 GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) override; + void SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) override; + void SetFrustumPlanes(const Plane* frustumPlanes) override; + HRESULT BeginFrame() override; + void EnableTransparency() override; + void SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance + ) override; + HRESULT FinalizeFrame() override; + void Resize(int width, int height, const ViewportTransform& viewportTransform) override; + void Clear(float r, float g, float b) override; + void Flip() override; + void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) override; + void Download(SDL_Surface* target) override; + void SetDither(bool dither) override; + void SetPalette(SDL_Palette* palette) override; + bool UsesPalettedSurfaces() const override { return true; } + +private: + void ClearZBuffer(); + void DrawTriangleProjected( + const D3DRMVERTEX& v0, + const D3DRMVERTEX& v1, + const D3DRMVERTEX& v2, + const Appearance& appearance + ); + void DrawTriangleClipped(const D3DRMVERTEX (&v)[3], const Appearance& appearance); + void ProjectVertex(const D3DVECTOR& v, D3DRMVECTOR4D& p) const; + Uint8 ApplyLighting(const D3DVECTOR& position, const D3DVECTOR& normal, const Appearance& appearance, Uint8 texel); + void BuildLightingLUT(); + void BuildBlendLUT(); + void AddTextureDestroyCallback(Uint32 id, IDirect3DRMTexture* texture); + void AddMeshDestroyCallback(Uint32 id, IDirect3DRMMesh* mesh); + + SDL_Surface* m_renderedImage = nullptr; + SDL_Palette* m_palette = nullptr; + SDL_Palette* m_flipPalette = nullptr; // Palette snapshot taken at Flip time (the correct one) + bool m_flipPaletteDirty = false; + std::vector m_lights; + std::vector m_textures; + std::vector m_meshes; + D3DVALUE m_front; + D3DVALUE m_back; + Matrix3x3 m_normalMatrix; + D3DRMMATRIX4D m_projection; + std::vector m_zBuffer; + std::vector m_transformedVerts; + Plane m_frustumPlanes[6]; + + // Lighting LUT: for each of 256 palette entries x 32 brightness levels, + // store the best-matching palette index. + // Usage: m_lightLUT[paletteIndex * 32 + brightnessLevel] + static constexpr int LIGHT_LEVELS = 32; + Uint8 m_lightLUT[256 * LIGHT_LEVELS]; + + // Blend LUT: for any two palette indices, the pre-computed 50/50 blend + // result mapped to the nearest palette colour. + // Usage: m_blendLUT[srcIndex * 256 + dstIndex] + Uint8 m_blendLUT[256 * 256]; + + bool m_lightLUTDirty = true; + bool m_transparencyEnabled = false; +}; + +inline static void Direct3DRMPaletteSW_EnumDevice(LPD3DENUMDEVICESCALLBACK cb, void* ctx) +{ + D3DDEVICEDESC halDesc = {}; + + D3DDEVICEDESC helDesc = {}; + helDesc.dcmColorModel = D3DCOLOR_RGB; + helDesc.dwFlags = D3DDD_DEVICEZBUFFERBITDEPTH; + helDesc.dwDeviceZBufferBitDepth = DDBD_16; + helDesc.dwDeviceRenderBitDepth = DDBD_8; + helDesc.dpcTriCaps.dwTextureCaps = D3DPTEXTURECAPS_PERSPECTIVE; + helDesc.dpcTriCaps.dwShadeCaps = D3DPSHADECAPS_ALPHAFLATBLEND; + helDesc.dpcTriCaps.dwTextureFilterCaps = D3DPTFILTERCAPS_LINEAR; + + EnumDevice(cb, ctx, "Miniwin Paletted Software", &halDesc, &helDesc, PALETTE_SW_GUID); +} diff --git a/miniwin/src/internal/d3drmrenderer_software.h b/miniwin/src/internal/d3drmrenderer_software.h index 0c422597..1821b492 100644 --- a/miniwin/src/internal/d3drmrenderer_software.h +++ b/miniwin/src/internal/d3drmrenderer_software.h @@ -50,6 +50,7 @@ class Direct3DRMSoftwareRenderer : public Direct3DRMRenderer { void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) override; void Download(SDL_Surface* target) override; void SetDither(bool dither) override; + void SetPalette(SDL_Palette* palette) override; private: void ClearZBuffer(); diff --git a/miniwin/src/internal/ddraw_impl.h b/miniwin/src/internal/ddraw_impl.h index 0da2d883..8f2ffe2e 100644 --- a/miniwin/src/internal/ddraw_impl.h +++ b/miniwin/src/internal/ddraw_impl.h @@ -61,9 +61,10 @@ struct DirectDrawImpl : public IDirectDraw2, public IDirect3D2, public IDirect3D float GetAnisotropic() const override { return m_anisotropic; } private: - FrameBufferImpl* m_frameBuffer; + FrameBufferImpl* m_frameBuffer = nullptr; int m_virtualWidth = 0; int m_virtualHeight = 0; + int m_virtualBPP = 0; DWORD m_msaaSamples = 0; float m_anisotropic = 0.0f; }; diff --git a/miniwin/src/internal/framebuffer_impl.h b/miniwin/src/internal/framebuffer_impl.h index 47805e69..76693f90 100644 --- a/miniwin/src/internal/framebuffer_impl.h +++ b/miniwin/src/internal/framebuffer_impl.h @@ -36,6 +36,8 @@ struct FrameBufferImpl : public IDirectDrawSurface3 { HRESULT SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) override; HRESULT Unlock(LPVOID lpSurfaceData) override; + bool IsIndex8() const { return m_transferBuffer->m_surface->format == SDL_PIXELFORMAT_INDEX8; } + private: uint32_t m_virtualWidth; uint32_t m_virtualHeight;