diff --git a/.github/actions/setup-djgpp-toolchain/action.yml b/.github/actions/setup-djgpp-toolchain/action.yml new file mode 100644 index 00000000..93a6fe2b --- /dev/null +++ b/.github/actions/setup-djgpp-toolchain/action.yml @@ -0,0 +1,71 @@ +name: 'Setup DJGPP toolchain' +description: 'Download DJGPP and setup CMake toolchain' +runs: + using: 'composite' + steps: + - name: 'Calculate variables' + id: calc + shell: sh + run: | + version="12.2.0" + case "${{ runner.os }}-${{ runner.arch }}" in + "Linux-X86") + archive="djgpp-linux32-gcc1220.tar.bz2" + ;; + "Linux-X64") + archive="djgpp-linux64-gcc1220.tar.bz2" + ;; + "macOS-X86" | "macOS-X64" | "macOS-ARM64") + archive="djgpp-osx-gcc1220.tar.bz2" + ;; + "Windows-X86" | "Windows-X64") + archive="djgpp-mingw-gcc1220.zip" + ;; + *) + echo "Unsupported ${{ runner.os }}-${{ runner.arch }}" + exit 1; + ;; + esac + echo "url=https://github.com/andrewwutw/build-djgpp/releases/download/v3.4/${archive}" >> ${GITHUB_OUTPUT} + echo "archive=${archive}" >> ${GITHUB_OUTPUT} + echo "version=${version}" >> ${GITHUB_OUTPUT} + echo "cache-key=${archive}-${{ inputs.version }}-${{ runner.os }}-${{ runner.arch }}" >> ${GITHUB_OUTPUT} + - name: 'Restore cached ${{ steps.calc.outputs.archive }}' + id: cache-restore + uses: actions/cache/restore@v5 + with: + path: '${{ runner.temp }}/${{ steps.calc.outputs.archive }}' + key: ${{ steps.calc.outputs.cache-key }} + - name: 'Download DJGPP ${{ steps.calc.outputs.version }} for ${{ runner.os }} (${{ runner.arch }})' + if: ${{ !steps.cache-restore.outputs.cache-hit || steps.cache-restore.outputs.cache-hit == 'false' }} + shell: pwsh + run: | + Invoke-WebRequest "${{ steps.calc.outputs.url }}" -OutFile "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + - name: 'Cache ${{ steps.calc.outputs.archive }}' + if: ${{ !steps.cache-restore.outputs.cache-hit || steps.cache-restore.outputs.cache-hit == 'false' }} + uses: actions/cache/save@v5 + with: + path: '${{ runner.temp }}/${{ steps.calc.outputs.archive }}' + key: ${{ steps.calc.outputs.cache-key }} + - name: 'Extract DJGP archive' + shell: pwsh + run: | + $archive = "${{ steps.calc.outputs.archive }}"; + if ($archive.EndsWith(".bz2")) { + # Remove ".bz2" suffix + $tar_archive = $archive.Substring(0, $archive.Length - 4) + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/$tar_archive" + } else { + 7z "-o${{ runner.temp }}" x "${{ runner.temp }}/${{ steps.calc.outputs.archive }}" + } + - name: 'Install Linux dependenciy' + if: ${{ runner.os == 'Linux' }} + shell: sh + run: | + sudo apt-get install -y libfl-dev + - name: 'Set output variables' + id: final + shell: pwsh + run: | + echo "${{ runner.temp }}/djgpp/bin" >> $env:GITHUB_PATH diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cbf8deb..b8f338ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: name: 'clang-format' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Run clang-format run: | find CONFIG LEGO1 ISLE miniwin -iname '*.h' -o -iname '*.cpp' | xargs \ @@ -48,10 +48,11 @@ jobs: - { name: 'iOS', os: 'macos-15', generator: 'Xcode', dx5: false, config: false, brew: true, werror: true, clang-tidy: false, cmake-args: '-DCMAKE_SYSTEM_NAME=iOS', ios: true } - { name: 'Emscripten', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, emsdk: true, werror: true, clang-tidy: false, cmake-wrapper: 'emcmake' } - { name: 'Nintendo 3DS', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, n3ds: true, werror: true, clang-tidy: false, container: 'devkitpro/devkitarm:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/3DS.cmake' } - - { name: 'Nintendo Switch', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, nx: true, werror: true, clang-tidy: false, container: 'devkitpro/devkita64:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/Switch.cmake' } + - { name: 'Nintendo Switch', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, nx: true, werror: true, clang-tidy: false, container: 'devkitpro/devkita64:latest', cmake-args: '-DCMAKE_TOOLCHAIN_FILE=/opt/devkitpro/cmake/Switch.cmake' } - { name: 'Xbox One', os: 'windows-latest', generator: 'Visual Studio 17 2022', dx5: false, config: false, msvc: true, werror: false, clang-tidy: false, vc-arch: 'amd64', cmake-args: '-DCMAKE_SYSTEM_NAME=WindowsStore -DCMAKE_SYSTEM_VERSION=10.0.26100.0', xbox-one: true} - { name: 'Android', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, android: true, werror: true, clang-tidy: false,} - { name: 'Vita', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, vita: true, werror: true, clang-tidy: false, cmake-args: '--toolchain /usr/local/vitasdk/share/vita.toolchain.cmake'} + - { name: 'DOS', os: 'ubuntu-latest', generator: 'Ninja', dx5: false, config: false, dos: true, werror: true, clang-tidy: false, cmake-args: '--toolchain $GITHUB_WORKSPACE/CMake/i586-pc-msdosdjgpp.cmake'} steps: - name: Setup vcvars if: ${{ !!matrix.msvc }} @@ -134,7 +135,7 @@ jobs: echo "$VITASDK/bin" >> $GITHUB_PATH ./install-all.sh - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Checkout LFS if: ${{ matrix.build-assets }} @@ -143,11 +144,15 @@ jobs: - name: Setup Java (Android) if: ${{ matrix.android }} - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: '17' + - name: 'Set up DJGPP toolchain' + uses: ./.github/actions/setup-djgpp-toolchain + if: ${{ matrix.dos }} + - name: Get CMake (Android) if: ${{ matrix.android }} uses: lukka/get-cmake@latest @@ -271,7 +276,7 @@ jobs: run: (cd build/assets && zip -r ../dist/isle-assets.zip .) - name: Upload Build Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: '${{ matrix.name }}' path: | @@ -287,7 +292,7 @@ jobs: name: 'FreeBSD' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build on FreeBSD uses: vmactions/freebsd-vm@v1 @@ -307,7 +312,7 @@ jobs: cd build && cpack . - name: Upload Build Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: 'FreeBSD' path: build/dist/isle-* @@ -331,7 +336,7 @@ jobs: options: --privileged steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build Flatpak uses: flatpak/flatpak-github-actions/flatpak-builder@v6 @@ -345,7 +350,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install LLVM and Clang uses: KyleMayes/install-llvm-action@v1 @@ -389,14 +394,14 @@ jobs: - freebsd steps: - name: Download All Artifacts - uses: actions/download-artifact@main + uses: actions/download-artifact@v8 with: pattern: "*" path: Release merge-multiple: true - name: Checkout uploadtool - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: 'probonopd/uploadtool' path: 'uploadtool' diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ca316590..6fe8ab2e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,7 +22,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -48,7 +48,7 @@ jobs: labels: ${{ steps.meta.outputs.labels }} - name: Generate artifact attestation - uses: actions/attest-build-provenance@v2 + uses: actions/attest-build-provenance@v4 with: subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} subject-digest: ${{ steps.push.outputs.digest }} diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index cb3918b7..e95415da 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -5,8 +5,8 @@ if(DOWNLOAD_DEPENDENCIES) include(FetchContent) FetchContent_Declare( miniaudio - URL https://github.com/mackron/miniaudio/archive/refs/tags/0.11.24.tar.gz - URL_MD5 19e8eb21223c56a4a2d167d04decddc9 + URL https://github.com/mackron/miniaudio/archive/refs/tags/0.11.25.tar.gz + URL_HASH MD5=6fae6da8f30afb3ddcba26fcaa64f540 ) block() set(BUILD_SHARED_LIBS OFF) @@ -34,6 +34,12 @@ target_compile_definitions(miniaudio PUBLIC MA_NO_RUNTIME_LINKING ) +if(DJGPP) + # DOS is single-threaded so we provide non-atomic __atomic_*_8 stubs + # (see CMakeLists.txt top-level comment about -march=i486). + target_sources(miniaudio PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/djgpp_atomic64.c") +endif() + if(DOWNLOAD_DEPENDENCIES) include(FetchContent) FetchContent_Declare( diff --git a/3rdparty/djgpp_atomic64.c b/3rdparty/djgpp_atomic64.c new file mode 100644 index 00000000..011e8c6a --- /dev/null +++ b/3rdparty/djgpp_atomic64.c @@ -0,0 +1,109 @@ +/* + * Non-atomic 64-bit __atomic_*_8 stubs for DJGPP / DOS. + * + * DOS is single-threaded so real atomics are unnecessary. GCC emits calls to + * these helper functions when targeting i486 (or when __i586__ is undefined) + * because the ISA lacks a native 64-bit atomic instruction. Normally libatomic + * provides them, but DJGPP doesn't ship libatomic. + * + * Every function simply performs a plain (non-atomic) load/store/exchange/CAS + * which is perfectly safe in a single-threaded environment. + */ + +#include +#include + +uint64_t __atomic_load_8(const volatile void *ptr, int memorder) +{ + (void)memorder; + uint64_t val; + memcpy(&val, (const void *)ptr, sizeof(val)); + return val; +} + +void __atomic_store_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + memcpy((void *)ptr, &val, sizeof(val)); +} + +uint64_t __atomic_exchange_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + memcpy((void *)ptr, &val, sizeof(val)); + return old; +} + +int __atomic_compare_exchange_8( + volatile void *ptr, + void *expected, + uint64_t desired, + int success_memorder, + int failure_memorder +) +{ + (void)success_memorder; + (void)failure_memorder; + uint64_t current; + memcpy(¤t, (void *)ptr, sizeof(current)); + uint64_t exp; + memcpy(&exp, expected, sizeof(exp)); + if (current == exp) { + memcpy((void *)ptr, &desired, sizeof(desired)); + return 1; + } + memcpy(expected, ¤t, sizeof(current)); + return 0; +} + +uint64_t __atomic_fetch_add_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old + val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_sub_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old - val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_and_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old & val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_or_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old | val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} + +uint64_t __atomic_fetch_xor_8(volatile void *ptr, uint64_t val, int memorder) +{ + (void)memorder; + uint64_t old; + memcpy(&old, (void *)ptr, sizeof(old)); + uint64_t new_val = old ^ val; + memcpy((void *)ptr, &new_val, sizeof(new_val)); + return old; +} diff --git a/3rdparty/miniaudio b/3rdparty/miniaudio index 13d161bc..9634bedb 160000 --- a/3rdparty/miniaudio +++ b/3rdparty/miniaudio @@ -1 +1 @@ -Subproject commit 13d161bc8d856ad61ae46b798bbeffc0f49808e8 +Subproject commit 9634bedb5b5a2ca38c1ee7108a9358a4e233f14d diff --git a/CMake/djgpp-platform-overrides.cmake b/CMake/djgpp-platform-overrides.cmake new file mode 100644 index 00000000..c919f843 --- /dev/null +++ b/CMake/djgpp-platform-overrides.cmake @@ -0,0 +1,17 @@ +# DJGPP platform overrides for DOS +# +# CMake's built-in Platform/DOS.cmake assumes OpenWatcom naming conventions +# (no prefix, .lib suffix, CMAKE_LINK_LIBRARY_SUFFIX=".lib"). DJGPP uses +# standard Unix/GCC conventions for its system libraries (lib prefix, .a +# suffix — e.g. libm.a). +# +# This file is loaded via CMAKE_USER_MAKE_RULES_OVERRIDE in the toolchain +# file, which runs *after* the platform module has set its defaults, giving +# us the final say on these variables. + +set(CMAKE_STATIC_LIBRARY_PREFIX "lib") +set(CMAKE_STATIC_LIBRARY_SUFFIX ".a") +set(CMAKE_LINK_LIBRARY_SUFFIX "") +set(CMAKE_FIND_LIBRARY_PREFIXES "lib" "") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".a" ".lib") +set(CMAKE_EXECUTABLE_SUFFIX ".exe") \ No newline at end of file diff --git a/CMake/i586-pc-msdosdjgpp.cmake b/CMake/i586-pc-msdosdjgpp.cmake new file mode 100644 index 00000000..8a4e765f --- /dev/null +++ b/CMake/i586-pc-msdosdjgpp.cmake @@ -0,0 +1,82 @@ +set(CMAKE_SYSTEM_NAME DOS) + +set(DJGPP TRUE) + +# CMake's Platform/DOS.cmake assumes OpenWatcom naming conventions (no prefix, +# .lib suffix). DJGPP uses standard Unix/GCC conventions for its system +# libraries (lib prefix, .a suffix — e.g. libm.a), so we override the platform +# defaults via CMAKE_USER_MAKE_RULES_OVERRIDE, which runs *after* the platform +# module has set its defaults, giving us the final say on these variables. +# The path must be cached because CMake re-parses the toolchain file during +# try_compile, where CMAKE_CURRENT_LIST_DIR may point elsewhere. +set(DJGPP_PLATFORM_OVERRIDES "${CMAKE_CURRENT_LIST_DIR}/djgpp-platform-overrides.cmake" CACHE FILEPATH "" FORCE) +set(CMAKE_USER_MAKE_RULES_OVERRIDE "${DJGPP_PLATFORM_OVERRIDES}") + +set(CMAKE_STATIC_LIBRARY_PREFIX "lib") +set(CMAKE_STATIC_LIBRARY_SUFFIX ".a") +set(CMAKE_SHARED_LIBRARY_PREFIX "") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dll") +set(CMAKE_IMPORT_LIBRARY_PREFIX "lib") +set(CMAKE_IMPORT_LIBRARY_SUFFIX ".a") +set(CMAKE_EXECUTABLE_SUFFIX ".exe") +set(CMAKE_LINK_LIBRARY_SUFFIX "") +set(CMAKE_DL_LIBS "") + +set(CMAKE_FIND_LIBRARY_PREFIXES "lib") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") + +# +# CMake toolchain file for DJGPP. Usage: +# +# 1. Download and extract DGJPP +# 2. Add directory containing i586-pc-msdosdjgpp-gcc to PATH environment variable +# 3. When configuring your CMake project, specify the toolchain file like this: +# +# cmake -DCMAKE_TOOLCHAIN_FILE=path/to/i586-pc-msdosdjgpp.cmake ... +# + +# specify the cross compiler +find_program(CMAKE_C_COMPILER NAMES "i586-pc-msdosdjgpp-gcc" "i386-pc-msdosdjgpp-gcc" REQUIRED) +find_program(CMAKE_CXX_COMPILER NAMES "i586-pc-msdosdjgpp-g++" "i386-pc-msdosdjgpp-g++" REQUIRED) + +execute_process(COMMAND "${CMAKE_C_COMPILER}" -print-search-dirs + RESULT_VARIABLE CC_SEARCH_DIRS_RESULT + OUTPUT_VARIABLE CC_SEARCH_DIRS_OUTPUT) + +if(CC_SEARCH_DIRS_RESULT) + message(FATAL_ERROR "Could not determine search dirs") +endif() + +string(REGEX MATCH ".*libraries: (.*).*" CC_SD_LIBS "${CC_SEARCH_DIRS_OUTPUT}") +string(STRIP "${CMAKE_MATCH_1}" CC_SEARCH_DIRS) +string(REPLACE ":" ";" CC_SEARCH_DIRS "${CC_SEARCH_DIRS}") + +foreach(CC_SEARCH_DIR ${CC_SEARCH_DIRS}) + if(CC_SEARCH_DIR MATCHES "=.*") + string(REGEX MATCH "=(.*)" CC_LIB "${CC_SEARCH_DIR}") + set(CC_SEARCH_DIR "${CMAKE_MATCH_1}") + endif() + if(IS_DIRECTORY "${CC_SEARCH_DIR}") + if(IS_DIRECTORY "${CC_SEARCH_DIR}/../include" OR IS_DIRECTORY "${CC_SEARCH_DIR}/../lib" OR IS_DIRECTORY "${CC_SEARCH_DIR}/../bin") + list(APPEND CC_ROOTS "${CC_SEARCH_DIR}/..") + else() + list(APPEND CC_ROOTS "${CC_SEARCH_DIR}") + endif() + endif() +endforeach() + +list(APPEND CMAKE_FIND_ROOT_PATH ${CC_ROOTS}) + +# search for programs in the host directories +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + +# for libraries, headers and packages in the target directories +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_LIBRARY}) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +endif() +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_INCLUDE}) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endif() +if(NOT DEFINED CACHE{CMAKE_FIND_ROOT_PATH_MODE_PACKAGE}) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +endif() \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f3f6dbd7..c1bea1c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,15 +86,24 @@ option(ISLE_WERROR "Treat warnings as errors" OFF) cmake_dependent_option(ISLE_USE_DX5 "Build with internal DirectX 5 SDK" "${NOT_MINGW}" "WIN32;CMAKE_SIZEOF_VOID_P EQUAL 4" OFF) cmake_dependent_option(ISLE_MINIWIN "Use miniwin" ON "NOT ISLE_USE_DX5" OFF) cmake_dependent_option(ISLE_EXTENSIONS "Use extensions" ON "NOT ISLE_USE_DX5;NOT WINDOWS_STORE" OFF) -cmake_dependent_option(ISLE_USE_LWS "Use libwebsockets for native multiplayer" ON "ISLE_EXTENSIONS;NOT EMSCRIPTEN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT VITA" OFF) -cmake_dependent_option(ISLE_BUILD_CONFIG "Build CONFIG.EXE application" ON "MSVC OR ISLE_MINIWIN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT WINDOWS_STORE;NOT VITA" OFF) +cmake_dependent_option(ISLE_USE_LWS "Use libwebsockets for native multiplayer" ON "ISLE_EXTENSIONS;NOT DOS;NOT EMSCRIPTEN;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT VITA" OFF) +cmake_dependent_option(ISLE_BUILD_CONFIG "Build CONFIG.EXE application" ON "MSVC OR ISLE_MINIWIN;NOT DOS;NOT NINTENDO_3DS;NOT NINTENDO_SWITCH;NOT WINDOWS_STORE;NOT VITA" OFF) cmake_dependent_option(ISLE_COMPILE_SHADERS "Compile shaders" ON "SDL_SHADERCROSS_BIN;TARGET Python3::Interpreter" OFF) -cmake_dependent_option(CMAKE_POSITION_INDEPENDENT_CODE "Build with -fPIC" ON "NOT VITA" OFF) +cmake_dependent_option(CMAKE_POSITION_INDEPENDENT_CODE "Build with -fPIC" ON "NOT DOS;NOT VITA" OFF) option(ENABLE_CLANG_TIDY "Enable clang-tidy") option(DOWNLOAD_DEPENDENCIES "Download dependencies" ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" CACHE PATH "Directory where to put executables and dll") set(ISLE_EMSCRIPTEN_HOST "" CACHE STRING "Host URL for Emscripten streaming (e.g., https://test.com)") -cmake_dependent_option(BUILD_SHARED_LIBS "Build lego1 as a shared library" ON "NOT EMSCRIPTEN;NOT VITA" OFF) +cmake_dependent_option(BUILD_SHARED_LIBS "Build lego1 as a shared library" ON "NOT DOS;NOT EMSCRIPTEN;NOT VITA" OFF) + +if(DOS) + # DJGPP targets i386 by default. We use i486 rather than i586 because i586 + # enables cmpxchg8b which GCC uses for 64-bit atomics (lock cmpxchg8b) — + # an instruction DOSBox does not support. The missing __atomic_*_8 helpers + # (normally in libatomic, which DJGPP doesn't ship) are provided as simple + # non-atomic stubs in 3rdparty/djgpp_atomic64.c since DOS is single-threaded. + add_compile_options(-march=i486) +endif() message(STATUS "Isle app: ${ISLE_BUILD_APP}") message(STATUS "Config app: ${ISLE_BUILD_CONFIG}") diff --git a/ISLE/isleapp.cpp b/ISLE/isleapp.cpp index 52633a00..e077567b 100644 --- a/ISLE/isleapp.cpp +++ b/ISLE/isleapp.cpp @@ -155,11 +155,19 @@ IsleApp::IsleApp() m_using8bit = FALSE; m_using16bit = TRUE; m_hasLightSupport = FALSE; +#ifdef __DJGPP__ + m_drawCursor = TRUE; +#else m_drawCursor = FALSE; +#endif m_use3dSound = TRUE; m_useMusic = TRUE; m_wideViewAngle = TRUE; +#ifdef __DJGPP__ + m_islandQuality = 1; +#else m_islandQuality = 2; +#endif m_islandTexture = 1; m_gameStarted = FALSE; m_frameDelta = 10; @@ -191,14 +199,22 @@ IsleApp::IsleApp() m_mediaPath = NULL; m_iniPath = NULL; m_maxLod = RealtimeView::GetUserMaxLOD(); +#ifdef __DJGPP__ + m_maxLod = 1.0f; +#endif m_maxAllowedExtras = m_islandQuality <= 1 ? 10 : 20; m_transitionType = MxTransitionManager::e_mosaic; m_cursorSensitivity = 4; m_touchScheme = LegoInputManager::e_gamepad; m_haptic = TRUE; m_wasd = FALSE; +#ifdef __DJGPP__ + m_xRes = 320; + m_yRes = 200; +#else m_xRes = 640; m_yRes = 480; +#endif m_exclusiveXRes = m_xRes; m_exclusiveYRes = m_yRes; m_exclusiveFrameRate = 60.00f; @@ -243,6 +259,10 @@ void IsleApp::Close() TransitionManager()->SetWaitIndicator(NULL); Lego()->Resume(); + if (BackgroundAudioManager()) { + BackgroundAudioManager()->Stop(); + } + while (Streamer()->Close(NULL) == SUCCESS) { } @@ -324,8 +344,16 @@ SDL_AppResult SDL_AppInit(void** appstate, int argc, char** argv) SDL_SetHint(SDL_HINT_MOUSE_TOUCH_EVENTS, "0"); SDL_SetHint(SDL_HINT_TOUCH_MOUSE_EVENTS, "0"); +#ifdef __DJGPP__ + SDL_SetHint("SDL_DOS_ALLOW_DIRECT_FRAMEBUFFER", "1"); +#endif - if (!SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_GAMEPAD | SDL_INIT_HAPTIC)) { + Uint32 initFlags = SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_GAMEPAD; +#ifndef __DJGPP__ + initFlags |= SDL_INIT_HAPTIC; +#endif + + if (!SDL_Init(initFlags)) { char buffer[256]; SDL_snprintf( buffer, @@ -717,11 +745,17 @@ SDL_AppResult SDL_AppEvent(void* appstate, SDL_Event* event) g_lastMouseX = event->motion.x; g_lastMouseY = event->motion.y; +#ifdef __DJGPP__ + if (VideoManager()) { + VideoManager()->MoveCursor(Min((MxS32) g_lastMouseX, 639), Min((MxS32) g_lastMouseY, 479)); + } +#else SDL_ShowCursor(); g_isle->SetDrawCursor(FALSE); if (VideoManager()) { VideoManager()->SetCursorBitmap(NULL); } +#endif break; case SDL_EVENT_FINGER_MOTION: { g_mousemoved = TRUE; @@ -924,6 +958,22 @@ MxResult IsleApp::SetupWindow() return FAILURE; } +#if defined(MINIWIN) + // MINIWIN: window/VESA mode matches the game's rendering resolution. + g_targetWidth = m_xRes; + g_targetHeight = m_yRes; +#else + // DX5: fullscreen uses exclusive resolution for display mode switching. + if (m_fullScreen) { + g_targetWidth = m_exclusiveXRes; + g_targetHeight = m_exclusiveYRes; + } + else { + g_targetWidth = m_xRes; + g_targetHeight = m_yRes; + } +#endif + SetupVideoFlags( m_fullScreen, m_flipSurfaces, @@ -954,7 +1004,7 @@ MxResult IsleApp::SetupWindow() SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_HEIGHT_NUMBER, g_targetHeight); SDL_SetBooleanProperty(props, SDL_PROP_WINDOW_CREATE_FULLSCREEN_BOOLEAN, m_fullScreen); SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, WINDOW_TITLE); -#if defined(MINIWIN) && !defined(__3DS__) && !defined(WINDOWS_STORE) && !defined(__vita__) +#if defined(MINIWIN) && !defined(__3DS__) && !defined(WINDOWS_STORE) && !defined(__vita__) && !defined(__DJGPP__) SDL_SetBooleanProperty(props, SDL_PROP_WINDOW_CREATE_OPENGL_BOOLEAN, true); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24); @@ -969,6 +1019,17 @@ MxResult IsleApp::SetupWindow() SDL_SetPointerProperty(SDL_GetWindowProperties(window), ISLE_PROP_WINDOW_CREATE_VIDEO_PARAM, &m_videoParam); +#ifdef __DJGPP__ + // DOS: request an 8-bit (INDEX8) fullscreen mode so the VESA + // framebuffer is paletted and we can blit INDEX8 surfaces directly. + { + SDL_DisplayMode mode = {}; + mode.w = g_targetWidth; + mode.h = g_targetHeight; + mode.format = SDL_PIXELFORMAT_INDEX8; + SDL_SetWindowFullscreenMode(window, &mode); + } +#else if (m_exclusiveFullScreen && m_fullScreen) { SDL_DisplayMode closestMode; SDL_DisplayID displayID = SDL_GetDisplayForWindow(window); @@ -983,6 +1044,7 @@ MxResult IsleApp::SetupWindow() SDL_SetWindowFullscreenMode(window, &closestMode); } } +#endif #ifdef MINIWIN m_windowHandle = reinterpret_cast(window); @@ -1251,7 +1313,7 @@ bool IsleApp::LoadConfig() m_videoParam.GetRect() = MxRect32(0, 0, (m_xRes - 1), (m_yRes - 1)); } m_frameRate = (1000.0f / iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta)); - m_frameDelta = static_cast(std::round(iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta))); + m_frameDelta = static_cast(iniparser_getdouble(dict, "isle:Frame Delta", m_frameDelta)); m_videoParam.SetMSAASamples((m_msaaSamples = iniparser_getint(dict, "isle:MSAA", m_msaaSamples))); m_videoParam.SetAnisotropic((m_anisotropic = iniparser_getdouble(dict, "isle:Anisotropic", m_anisotropic))); m_activeInBackground = iniparser_getboolean(dict, "isle:Active in Background", m_activeInBackground); diff --git a/LEGO1/lego/legoomni/include/legovideomanager.h b/LEGO1/lego/legoomni/include/legovideomanager.h index d140ee65..b20ccc10 100644 --- a/LEGO1/lego/legoomni/include/legovideomanager.h +++ b/LEGO1/lego/legoomni/include/legovideomanager.h @@ -76,6 +76,7 @@ class LegoVideoManager : public MxVideoManager { void SetRender3D(MxBool p_render3d) { m_render3d = p_render3d; } void SetUnk0x554(MxBool p_unk0x554) { m_unk0x554 = p_unk0x554; } + MxBool GetDrawCursor() { return m_drawCursor; } // SYNTHETIC: LEGO1 0x1007ab20 // SYNTHETIC: BETA10 0x100d8040 @@ -88,9 +89,6 @@ class LegoVideoManager : public MxVideoManager { inline void DrawCursor(); - void DrawDigitToBuffer32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, int p_digit, uint32_t p_color); - void DrawTextToSurface32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, const char* p_text, uint32_t p_color); - Tgl::Renderer* m_renderer; // 0x64 Lego3DManager* m_3dManager; // 0x68 LegoROI* m_viewROI; // 0x6c diff --git a/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h b/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h index d85656ca..7c8c7270 100644 --- a/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h +++ b/LEGO1/lego/legoomni/include/mxbackgroundaudiomanager.h @@ -49,7 +49,7 @@ class MxBackgroundAudioManager : public MxCore { void Init(); void Update(MxS32 p_targetVolume, MxS32 p_speed, MxPresenter::TickleState p_tickleState); - void Stop(); + LEGO1_EXPORT void Stop(); void LowerVolume(); void RaiseVolume(); MxResult SetPendingPresenter(MxPresenter* p_presenter, MxS32 p_speed, MxPresenter::TickleState p_tickleState); diff --git a/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp b/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp index 96617e52..3aa0df8d 100644 --- a/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp +++ b/LEGO1/lego/legoomni/src/audio/mxbackgroundaudiomanager.cpp @@ -77,6 +77,10 @@ void MxBackgroundAudioManager::DestroyMusic() Streamer()->Close(m_script.GetInternal()); m_enabled = FALSE; } + + m_activePresenter = NULL; + m_pendingPresenter = NULL; + m_tickleState = MxPresenter::e_idle; } // FUNCTION: LEGO1 0x1007ee40 diff --git a/LEGO1/lego/legoomni/src/video/legovideomanager.cpp b/LEGO1/lego/legoomni/src/video/legovideomanager.cpp index 3aa6044f..3ce5697b 100644 --- a/LEGO1/lego/legoomni/src/video/legovideomanager.cpp +++ b/LEGO1/lego/legoomni/src/video/legovideomanager.cpp @@ -465,7 +465,45 @@ void LegoVideoManager::DrawFPS() if (m_unk0x528->Lock(NULL, &surfaceDesc, DDLOCK_WAIT, NULL) == DD_OK) { memset(surfaceDesc.lpSurface, 0, surfaceDesc.lPitch * surfaceDesc.dwHeight); - DrawTextToSurface32((uint8_t*) surfaceDesc.lpSurface, surfaceDesc.lPitch, 0, 0, buffer, 0xFF0000FF); + // 8-bit bitmap font for FPS display + uint8_t* dst = (uint8_t*) surfaceDesc.lpSurface; + int pitch = surfaceDesc.lPitch; + const char* p = buffer; + int px = 0; + static const uint8_t g_digitFont[5][10] = { + {0b1111, 0b0001, 0b1111, 0b1111, 0b1001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111}, + {0b1001, 0b0001, 0b0001, 0b0001, 0b1001, 0b1000, 0b1000, 0b0001, 0b1001, 0b1001}, + {0b1001, 0b0001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0010, 0b1111, 0b1111}, + {0b1001, 0b0001, 0b1000, 0b0001, 0b0001, 0b0001, 0b1001, 0b0010, 0b1001, 0b0001}, + {0b1111, 0b0001, 0b1111, 0b1111, 0b0001, 0b1111, 0b1111, 0b0100, 0b1111, 0b1111}, + }; + while (*p) { + if (*p >= '0' && *p <= '9') { + int d = *p - '0'; + for (int row = 0; row < 5; ++row) { + uint8_t bits = g_digitFont[row][d]; + for (int col = 0; col < 5; ++col) { + if (bits & (1 << (4 - col))) { + for (int dy = 0; dy < 2; ++dy) { + for (int dx = 0; dx < 2; ++dx) { + dst[(row * 2 + dy) * pitch + (px + col * 2 + dx)] = 0xff; + } + } + } + } + } + px += 10; + } + else if (*p == '.') { + for (int dy = 0; dy < 2; ++dy) { + for (int dx = 0; dx < 2; ++dx) { + dst[(10 + dy) * pitch + (px + 2 + dx)] = 0xff; + } + } + px += 4; + } + ++p; + } m_unk0x528->Unlock(surfaceDesc.lpSurface); m_unk0x550 = 1.f; @@ -789,66 +827,6 @@ MxResult LegoVideoManager::ConfigureD3DRM() return SUCCESS; } -void LegoVideoManager::DrawDigitToBuffer32(uint8_t* p_dst, int p_pitch, int p_x, int p_y, int p_digit, uint32_t p_color) -{ - if (p_digit < 0 || p_digit > 9) { - return; - } - - uint32_t* pixels = (uint32_t*) p_dst; - int rowStride = p_pitch / 4; - - // 4x5 bitmap font - const uint8_t digitFont[5][10] = { - {0b1111, 0b0001, 0b1111, 0b1111, 0b1001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111}, - {0b1001, 0b0001, 0b0001, 0b0001, 0b1001, 0b1000, 0b1000, 0b0001, 0b1001, 0b1001}, - {0b1001, 0b0001, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0010, 0b1111, 0b1111}, - {0b1001, 0b0001, 0b1000, 0b0001, 0b0001, 0b0001, 0b1001, 0b0010, 0b1001, 0b0001}, - {0b1111, 0b0001, 0b1111, 0b1111, 0b0001, 0b1111, 0b1111, 0b0100, 0b1111, 0b1111}, - }; - - for (int row = 0; row < 5; ++row) { - uint8_t bits = digitFont[row][p_digit]; - for (int col = 0; col < 5; ++col) { - if (bits & (1 << (4 - col))) { - for (int dy = 0; dy < 2; ++dy) { - for (int dx = 0; dx < 2; ++dx) { - pixels[(p_y + row * 2 + dy) * rowStride + (p_x + col * 2 + dx)] = p_color; - } - } - } - } - } -} - -void LegoVideoManager::DrawTextToSurface32( - uint8_t* p_dst, - int p_pitch, - int p_x, - int p_y, - const char* p_text, - uint32_t p_color -) -{ - while (*p_text) { - if (*p_text >= '0' && *p_text <= '9') { - DrawDigitToBuffer32(p_dst, p_pitch, p_x, p_y, *p_text - '0', p_color); - p_x += 10; - } - else if (*p_text == '.') { - uint32_t* pixels = (uint32_t*) p_dst; - int rowStride = p_pitch / 4; - for (int dy = 0; dy < 2; ++dy) { - for (int dx = 0; dx < 2; ++dx) { - pixels[(p_y + 10 + dy) * rowStride + (p_x + 2 + dx)] = p_color; - } - } - p_x += 4; - } - ++p_text; - } -} - void LegoVideoManager::SetCursorBitmap(const CursorBitmap* p_cursorBitmap) { if (p_cursorBitmap == NULL) { diff --git a/LEGO1/lego/legoomni/src/worlds/infocenter.cpp b/LEGO1/lego/legoomni/src/worlds/infocenter.cpp index bebf667d..48468a9e 100644 --- a/LEGO1/lego/legoomni/src/worlds/infocenter.cpp +++ b/LEGO1/lego/legoomni/src/worlds/infocenter.cpp @@ -1499,7 +1499,7 @@ void Infocenter::StartCredits() GetViewManager()->RemoveAll(NULL); InvokeAction(Extra::e_opendisk, *g_creditsScript, CreditsScript::c_LegoCredits, NULL); - SetAppCursor(e_cursorArrow); + SetAppCursor(VideoManager()->GetDrawCursor() ? e_cursorNone : e_cursorArrow); } // FUNCTION: LEGO1 0x10071250 diff --git a/LEGO1/omni/src/video/mxdisplaysurface.cpp b/LEGO1/omni/src/video/mxdisplaysurface.cpp index 4ebccb58..f7e265bb 100644 --- a/LEGO1/omni/src/video/mxdisplaysurface.cpp +++ b/LEGO1/omni/src/video/mxdisplaysurface.cpp @@ -305,6 +305,7 @@ void MxDisplaySurface::Destroy() // FUNCTION: BETA10 0x1013fe15 void MxDisplaySurface::SetPalette(MxPalette* p_palette) { +#ifndef MINIWIN if ((m_surfaceDesc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) == DDPF_PALETTEINDEXED8) { m_ddSurface1->SetPalette(p_palette->CreateNativePalette()); m_ddSurface2->SetPalette(p_palette->CreateNativePalette()); @@ -326,8 +327,10 @@ void MxDisplaySurface::SetPalette(MxPalette* p_palette) DeleteObject(hpal); } } +#else + m_ddSurface1->SetPalette(p_palette->CreateNativePalette()); + m_ddSurface2->SetPalette(p_palette->CreateNativePalette()); -#ifndef MINIWIN MxS32 bitCount = m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount; if (bitCount == 8) { return; @@ -449,17 +452,6 @@ void MxDisplaySurface::VTable0x28( } #endif - if (m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount != 32) { - DDCOLORKEY colorKey; - if (m_surfaceDesc.ddpfPixelFormat.dwRGBBitCount == 8) { - colorKey.dwColorSpaceLowValue = colorKey.dwColorSpaceHighValue = 0x10; - } - else { - colorKey.dwColorSpaceLowValue = colorKey.dwColorSpaceHighValue = RGB555_CREATE(0x1f, 0, 0x1f); - } - tempSurface->SetColorKey(DDCKEY_SRCBLT, &colorKey); - } - DDSURFACEDESC tempDesc; memset(&tempDesc, 0, sizeof(tempDesc)); tempDesc.dwSize = sizeof(tempDesc); @@ -511,10 +503,10 @@ void MxDisplaySurface::VTable0x28( if (m_videoParam.Flags().GetDoubleScaling()) { RECT destRect = {p_right, p_bottom, p_right + p_width * 2, p_bottom + p_height * 2}; - m_ddSurface2->Blt(&destRect, tempSurface, NULL, DDBLT_WAIT | DDBLT_KEYSRC, NULL); + m_ddSurface2->Blt(&destRect, tempSurface, NULL, DDBLT_WAIT, NULL); } else { - m_ddSurface2->BltFast(p_right, p_bottom, tempSurface, NULL, DDBLTFAST_WAIT | DDBLTFAST_SRCCOLORKEY); + m_ddSurface2->BltFast(p_right, p_bottom, tempSurface, NULL, DDBLTFAST_WAIT); } tempSurface->Release(); @@ -1083,10 +1075,6 @@ LPDIRECTDRAWSURFACE MxDisplaySurface::FUN_100bc8b0(MxS32 p_width, MxS32 p_height return NULL; } - if (surfaceDesc.ddpfPixelFormat.dwRGBBitCount == 8) { - return NULL; - } - surfaceDesc.dwWidth = p_width; surfaceDesc.dwHeight = p_height; surfaceDesc.dwFlags = DDSD_PIXELFORMAT | DDSD_WIDTH | DDSD_HEIGHT | DDSD_CAPS; diff --git a/LEGO1/viewmanager/viewmanager.cpp b/LEGO1/viewmanager/viewmanager.cpp index ac1a36a3..9b4a3482 100644 --- a/LEGO1/viewmanager/viewmanager.cpp +++ b/LEGO1/viewmanager/viewmanager.cpp @@ -238,7 +238,9 @@ inline void ViewManager::ManageVisibilityAndDetailRecursively(ViewROI* p_from, i const CompoundObject* comp = p_from->GetComp(); if (p_lodLevel == ViewROI::c_lodLevelUnset) { - if (p_from->GetWorldBoundingSphere().Radius() > 0.001F) { + // FIX: Use 0.002 threshold to avoid x87 extended precision boundary + // issues where 0.001 sentinel radius compares as > 0.001F on x87. + if (p_from->GetWorldBoundingSphere().Radius() > 0.002F) { float projectedSize = ProjectedSize(p_from->GetWorldBoundingSphere()); if (RealtimeView::GetUserMaxLOD() <= 5.0f && projectedSize < seconds_allowed * g_viewDistance) { diff --git a/miniwin/CMakeLists.txt b/miniwin/CMakeLists.txt index 112adfda..e65feb67 100644 --- a/miniwin/CMakeLists.txt +++ b/miniwin/CMakeLists.txt @@ -69,6 +69,12 @@ if(NOT (VITA OR WINDOWS_STORE)) endif() endif() +if(DOS) + list(REMOVE_ITEM GRAPHICS_BACKENDS USE_SDL_GPU USE_OPENGL1 USE_OPENGLES2) #USE_SDL_GPU +endif() + +list(APPEND GRAPHICS_BACKENDS USE_PALETTE_SW_RENDER) + if(NINTENDO_SWITCH) # Remove USE_OPENGL1 as incompatible. # Remove everything else as not needed. @@ -139,6 +145,12 @@ if(USE_SOFTWARE_RENDER IN_LIST GRAPHICS_BACKENDS) ) endif() +if(USE_PALETTE_SW_RENDER IN_LIST GRAPHICS_BACKENDS) + target_sources(miniwin PRIVATE + src/d3drm/backends/palettesw/renderer.cpp + ) +endif() + target_compile_definitions(miniwin PUBLIC MINIWIN) target_include_directories(miniwin diff --git a/miniwin/src/d3drm/backends/palettesw/renderer.cpp b/miniwin/src/d3drm/backends/palettesw/renderer.cpp new file mode 100644 index 00000000..a4575267 --- /dev/null +++ b/miniwin/src/d3drm/backends/palettesw/renderer.cpp @@ -0,0 +1,1479 @@ +#include "d3drmrenderer.h" +#include "d3drmrenderer_palettesw.h" +#include "ddsurface_impl.h" +#include "mathutils.h" +#include "meshutils.h" +#include "miniwin.h" + +#include +#include +#include +#include +#include + +struct PalVertexXY { + float x, y, z, w; + Uint8 brightness; // 0..LIGHT_LEVELS-1 + float u_over_w, v_over_w; + float one_over_w; +}; + +static constexpr int PERSP_STEP = 16; + +inline static D3DVECTOR PalSubtract(const D3DVECTOR& a, const D3DVECTOR& b) +{ + return {a.x - b.x, a.y - b.y, a.z - b.z}; +} + +inline static bool PalIsBackface(const D3DVECTOR& a, const D3DVECTOR& b, const D3DVECTOR& c) +{ + D3DVECTOR normal = CrossProduct(PalSubtract(b, a), PalSubtract(c, a)); + return DotProduct(normal, a) >= 0.0f; +} + +Direct3DRMPaletteSWRenderer::Direct3DRMPaletteSWRenderer(DWORD width, DWORD height) +{ + m_virtualWidth = width; + m_virtualHeight = height; + + memset(m_lightLUT, 0, sizeof(m_lightLUT)); + memset(m_blendLUT, 0, sizeof(m_blendLUT)); + ViewportTransform viewportTransform = {1.0f, 0.0f, 0.0f}; + Resize(width, height, viewportTransform); +} + +Direct3DRMPaletteSWRenderer::~Direct3DRMPaletteSWRenderer() +{ + SDL_DestroySurface(m_renderedImage); + if (m_flipPalette) { + SDL_DestroyPalette(m_flipPalette); + } +} + +static bool PalettesEqual(SDL_Palette* a, SDL_Palette* b) +{ + if (!a || !b || a->ncolors != b->ncolors) { + return false; + } + return memcmp(a->colors, b->colors, a->ncolors * sizeof(SDL_Color)) == 0; +} + +// --------------------------------------------------------------------------- +// Lighting LUT +// --------------------------------------------------------------------------- +// For each palette entry and brightness level, precompute the closest palette +// index. Brightness 0 = black, LIGHT_LEVELS-1 = full colour. +// This avoids per-pixel RGB maths entirely — the rasteriser just does: +// outPixel = m_lightLUT[texel * LIGHT_LEVELS + brightness] +// --------------------------------------------------------------------------- + +void Direct3DRMPaletteSWRenderer::BuildLightingLUT() +{ + // Use m_flipPalette (snapshot from Flip time) if available — that's the + // palette actually sent to the VGA DAC. Fall back to m_palette for the + // first frame before any Flip has occurred. + SDL_Palette* pal = m_flipPalette ? m_flipPalette : m_palette; + if (!pal) { + return; + } + + const SDL_Color* colors = pal->colors; + const int ncolors = pal->ncolors; + + for (int idx = 0; idx < 256; ++idx) { + int sr, sg, sb; + if (idx < ncolors) { + sr = colors[idx].r; + sg = colors[idx].g; + sb = colors[idx].b; + } + else { + sr = sg = sb = 0; + } + + for (int lev = 0; lev < LIGHT_LEVELS; ++lev) { + // Target colour at this brightness + int tr = (sr * lev) / (LIGHT_LEVELS - 1); + int tg = (sg * lev) / (LIGHT_LEVELS - 1); + int tb = (sb * lev) / (LIGHT_LEVELS - 1); + + // Find nearest palette entry (redmean perceptual distance) + int bestDist = INT_MAX; + Uint8 bestIdx = static_cast(idx); + for (int c = 0; c < ncolors; ++c) { + int dr = colors[c].r - tr; + int dg = colors[c].g - tg; + int db = colors[c].b - tb; + int rmean = (tr + colors[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + m_lightLUT[idx * LIGHT_LEVELS + lev] = bestIdx; + } + } + + m_lightLUTDirty = false; +} + +void Direct3DRMPaletteSWRenderer::BuildBlendLUT() +{ + SDL_Palette* pal = m_flipPalette ? m_flipPalette : m_palette; + if (!pal) { + memset(m_blendLUT, 0, sizeof(m_blendLUT)); + return; + } + + const SDL_Color* colors = pal->colors; + const int ncolors = pal->ncolors; + + for (int a = 0; a < 256; ++a) { + int ar, ag, ab; + if (a < ncolors) { + ar = colors[a].r; + ag = colors[a].g; + ab = colors[a].b; + } + else { + ar = ag = ab = 0; + } + + for (int b = 0; b < 256; ++b) { + int br, bg, bb; + if (b < ncolors) { + br = colors[b].r; + bg = colors[b].g; + bb = colors[b].b; + } + else { + br = bg = bb = 0; + } + + // 50/50 blend + int tr = (ar + br) >> 1; + int tg = (ag + bg) >> 1; + int tb = (ab + bb) >> 1; + + // Find nearest palette entry + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < ncolors; ++c) { + int dr = colors[c].r - tr; + int dg = colors[c].g - tg; + int db = colors[c].b - tb; + int rmean = (tr + colors[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + m_blendLUT[a * 256 + b] = bestIdx; + } + } +} + +void Direct3DRMPaletteSWRenderer::PushLights(const SceneLight* lights, size_t count) +{ + m_lights.assign(lights, lights + count); +} + +void Direct3DRMPaletteSWRenderer::SetFrustumPlanes(const Plane* frustumPlanes) +{ + memcpy(m_frustumPlanes, frustumPlanes, sizeof(m_frustumPlanes)); +} + +void Direct3DRMPaletteSWRenderer::SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) +{ + m_front = front; + m_back = back; + memcpy(m_projection, projection, sizeof(D3DRMMATRIX4D)); +} + +void Direct3DRMPaletteSWRenderer::ClearZBuffer() +{ + static_assert(sizeof(float) == sizeof(uint32_t), "float must be 32-bit"); + const size_t size = m_zBuffer.size(); + uint32_t* dst = reinterpret_cast(m_zBuffer.data()); + for (size_t i = 0; i < size; ++i) { + dst[i] = 0x7F800000u; + } +} + +void Direct3DRMPaletteSWRenderer::ProjectVertex(const D3DVECTOR& v, D3DRMVECTOR4D& p) const +{ + float px = m_projection[0][0] * v.x + m_projection[1][0] * v.y + m_projection[2][0] * v.z + m_projection[3][0]; + float py = m_projection[0][1] * v.x + m_projection[1][1] * v.y + m_projection[2][1] * v.z + m_projection[3][1]; + float pz = m_projection[0][2] * v.x + m_projection[1][2] * v.y + m_projection[2][2] * v.z + m_projection[3][2]; + float pw = m_projection[0][3] * v.x + m_projection[1][3] * v.y + m_projection[2][3] * v.z + m_projection[3][3]; + + p.w = pw; + + // Perspective divide + if (pw != 0.0f) { + float invW = 1.0f / pw; + px *= invW; + py *= invW; + pz *= invW; + } + + // Map from NDC [-1,1] to screen coordinates + p.x = (px * 0.5f + 0.5f) * m_width; + p.y = (1.0f - (py * 0.5f + 0.5f)) * m_height; + p.z = pz; +} + +// --------------------------------------------------------------------------- +// Lighting — returns a brightness level 0..LIGHT_LEVELS-1 +// --------------------------------------------------------------------------- + +// Fast integer-based pow approximation for specular highlights. +// Repeated squaring: computes base^exp where exp is a positive integer. +// Good enough for 8-bit paletted lighting, avoids expensive FPU std::pow. +inline static float FastPow(float base, float exponent) +{ + if (base <= 0.0f) { + return 0.0f; + } + int iexp = static_cast(exponent + 0.5f); + if (iexp <= 0) { + return 1.0f; + } + float result = 1.0f; + float b = base; + while (iexp > 0) { + if (iexp & 1) { + result *= b; + } + b *= b; + iexp >>= 1; + } + return result; +} + +Uint8 Direct3DRMPaletteSWRenderer::ApplyLighting( + const D3DVECTOR& position, + const D3DVECTOR& normal, + const Appearance& appearance, + Uint8 texel +) +{ + (void) texel; // brightness is independent of the palette index + + float intensity = 0.0f; + + D3DVECTOR n = Normalize(TransformNormal(normal, m_normalMatrix)); + + for (const auto& light : m_lights) { + if (light.positional == 0.0f && light.directional == 0.0f) { + // Ambient + float lum = light.color.r * 0.299f + light.color.g * 0.587f + light.color.b * 0.114f; + intensity += lum; + continue; + } + + // Precompute luminance once per light (avoids redundant multiplies) + float lum = light.color.r * 0.299f + light.color.g * 0.587f + light.color.b * 0.114f; + + D3DVECTOR lightVec; + if (light.directional == 1.0f) { + lightVec = {-light.direction.x, -light.direction.y, -light.direction.z}; + } + else { + lightVec = {light.position.x - position.x, light.position.y - position.y, light.position.z - position.z}; + } + lightVec = Normalize(lightVec); + + float dotNL = DotProduct(n, lightVec); + if (dotNL > 0.0f) { + intensity += dotNL * lum; + + // Specular — use fast integer pow instead of std::pow + if (appearance.shininess > 0.0f && light.directional == 1.0f) { + D3DVECTOR viewVec = Normalize({-position.x, -position.y, -position.z}); + D3DVECTOR H = Normalize({lightVec.x + viewVec.x, lightVec.y + viewVec.y, lightVec.z + viewVec.z}); + float dotNH = std::max(DotProduct(n, H), 0.0f); + float spec = FastPow(dotNH, appearance.shininess); + intensity += spec * lum; + } + } + } + + intensity = std::min(intensity, 1.0f); + int level = static_cast(intensity * (LIGHT_LEVELS - 1) + 0.5f); + if (level < 0) { + level = 0; + } + if (level >= LIGHT_LEVELS) { + level = LIGHT_LEVELS - 1; + } + return static_cast(level); +} + +static D3DRMVERTEX PalSplitEdge(D3DRMVERTEX a, const D3DRMVERTEX& b, float plane) +{ + float t = (plane - a.position.z) / (b.position.z - a.position.z); + a.position.x += t * (b.position.x - a.position.x); + a.position.y += t * (b.position.y - a.position.y); + a.position.z = plane; + + a.texCoord.u += t * (b.texCoord.u - a.texCoord.u); + a.texCoord.v += t * (b.texCoord.v - a.texCoord.v); + + a.normal.x += t * (b.normal.x - a.normal.x); + a.normal.y += t * (b.normal.y - a.normal.y); + a.normal.z += t * (b.normal.z - a.normal.z); + + a.normal = Normalize(a.normal); + + return a; +} + +static bool PalIsTriangleOutsideViewCone( + const D3DVECTOR& v0, + const D3DVECTOR& v1, + const D3DVECTOR& v2, + const Plane* frustumPlanes +) +{ + for (int i = 0; i < 4; ++i) { + const Plane& plane = frustumPlanes[i]; + + float d0 = DotProduct(plane.normal, v0) + plane.d; + float d1 = DotProduct(plane.normal, v1) + plane.d; + float d2 = DotProduct(plane.normal, v2) + plane.d; + + if (d0 < 0 && d1 < 0 && d2 < 0) { + return true; + } + } + return false; +} + +void Direct3DRMPaletteSWRenderer::DrawTriangleClipped(const D3DRMVERTEX (&v)[3], const Appearance& appearance) +{ + bool in0 = v[0].position.z >= m_front; + bool in1 = v[1].position.z >= m_front; + bool in2 = v[2].position.z >= m_front; + + int insideCount = in0 + in1 + in2; + + if (insideCount == 0 || (v[0].position.z > m_back && v[1].position.z > m_back && v[2].position.z > m_back)) { + return; + } + if (PalIsTriangleOutsideViewCone(v[0].position, v[1].position, v[2].position, m_frustumPlanes)) { + return; + } + + if (insideCount == 3) { + DrawTriangleProjected(v[0], v[1], v[2], appearance); + } + else if (insideCount == 2) { + D3DRMVERTEX split; + if (!in0) { + split = PalSplitEdge(v[2], v[0], m_front); + DrawTriangleProjected(v[1], v[2], split, appearance); + DrawTriangleProjected(v[1], split, PalSplitEdge(v[1], v[0], m_front), appearance); + } + else if (!in1) { + split = PalSplitEdge(v[0], v[1], m_front); + DrawTriangleProjected(v[2], v[0], split, appearance); + DrawTriangleProjected(v[2], split, PalSplitEdge(v[2], v[1], m_front), appearance); + } + else { + split = PalSplitEdge(v[1], v[2], m_front); + DrawTriangleProjected(v[0], v[1], split, appearance); + DrawTriangleProjected(v[0], split, PalSplitEdge(v[0], v[2], m_front), appearance); + } + } + else if (in0) { + DrawTriangleProjected(v[0], PalSplitEdge(v[0], v[1], m_front), PalSplitEdge(v[0], v[2], m_front), appearance); + } + else if (in1) { + DrawTriangleProjected(PalSplitEdge(v[1], v[0], m_front), v[1], PalSplitEdge(v[1], v[2], m_front), appearance); + } + else { + DrawTriangleProjected(PalSplitEdge(v[2], v[0], m_front), PalSplitEdge(v[2], v[1], m_front), v[2], appearance); + } +} + +void Direct3DRMPaletteSWRenderer::DrawTriangleProjected( + const D3DRMVERTEX& v0, + const D3DRMVERTEX& v1, + const D3DRMVERTEX& v2, + const Appearance& appearance +) +{ + if (PalIsBackface(v0.position, v1.position, v2.position)) { + return; + } + + D3DRMVECTOR4D p0, p1, p2; + ProjectVertex(v0.position, p0); + ProjectVertex(v1.position, p1); + ProjectVertex(v2.position, p2); + + Uint8 b0 = ApplyLighting(v0.position, v0.normal, appearance, 0); + Uint8 b1 = b0, b2 = b0; + if (!appearance.flat) { + b1 = ApplyLighting(v1.position, v1.normal, appearance, 0); + b2 = ApplyLighting(v2.position, v2.normal, appearance, 0); + } + + Uint8* pixels = static_cast(m_renderedImage->pixels); + int pitch = m_renderedImage->pitch; + + PalVertexXY verts[3] = { + {p0.x, p0.y, p0.z, p0.w, b0, 0, 0, 0}, + {p1.x, p1.y, p1.z, p1.w, b1, 0, 0, 0}, + {p2.x, p2.y, p2.z, p2.w, b2, 0, 0, 0}, + }; + + Uint32 textureId = appearance.textureId; + int texturePitch = 0; + Uint8* texels = nullptr; + int texWidthScale = 0; + int texHeightScale = 0; + + if (textureId != NO_TEXTURE_ID) { + SDL_Surface* texture = m_textures[textureId].cached; + if (texture) { + texturePitch = texture->pitch; + texels = static_cast(texture->pixels); + texWidthScale = texture->w - 1; + texHeightScale = texture->h - 1; + } + + verts[0].u_over_w = v0.texCoord.u / p0.w; + verts[0].v_over_w = v0.texCoord.v / p0.w; + verts[0].one_over_w = 1.0f / p0.w; + + verts[1].u_over_w = v1.texCoord.u / p1.w; + verts[1].v_over_w = v1.texCoord.v / p1.w; + verts[1].one_over_w = 1.0f / p1.w; + + verts[2].u_over_w = v2.texCoord.u / p2.w; + verts[2].v_over_w = v2.texCoord.v / p2.w; + verts[2].one_over_w = 1.0f / p2.w; + } + + // Sort verts + if (verts[0].y > verts[1].y) { + std::swap(verts[0], verts[1]); + } + if (verts[1].y > verts[2].y) { + std::swap(verts[1], verts[2]); + } + if (verts[0].y > verts[1].y) { + std::swap(verts[0], verts[1]); + } + + int minY = std::max(0, static_cast(std::ceil(verts[0].y))); + int maxY = std::min(m_height - 1, static_cast(std::floor(verts[2].y))); + + // For untextured triangles, find the nearest palette entry for the + // material colour so we can use the LUT. + Uint8 materialPalIdx = 0; + if (!texels && m_palette) { + Uint8 mr = appearance.color.r; + Uint8 mg = appearance.color.g; + Uint8 mb = appearance.color.b; + int bestDist = INT_MAX; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - mr; + int dg = m_palette->colors[c].g - mg; + int db = m_palette->colors[c].b - mb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + materialPalIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + } + + Uint8 alpha = appearance.color.a; + + // --- Set up incremental edge stepping --- + // Long edge: verts[0] -> verts[2] (always the "right" side before swap) + float longDy = verts[2].y - verts[0].y; + float invLongDy = (longDy != 0.0f) ? 1.0f / longDy : 0.0f; + // Long edge values at minY + float longT0 = (minY - verts[0].y) * invLongDy; + PalVertexXY longEdge; + longEdge.x = verts[0].x + longT0 * (verts[2].x - verts[0].x); + longEdge.z = verts[0].z + longT0 * (verts[2].z - verts[0].z); + longEdge.u_over_w = verts[0].u_over_w + longT0 * (verts[2].u_over_w - verts[0].u_over_w); + longEdge.v_over_w = verts[0].v_over_w + longT0 * (verts[2].v_over_w - verts[0].v_over_w); + longEdge.one_over_w = verts[0].one_over_w + longT0 * (verts[2].one_over_w - verts[0].one_over_w); + float longBri = verts[0].brightness + longT0 * (static_cast(verts[2].brightness) - verts[0].brightness); + // Long edge step per scanline + float longStepX = (verts[2].x - verts[0].x) * invLongDy; + float longStepZ = (verts[2].z - verts[0].z) * invLongDy; + float longStepBri = (static_cast(verts[2].brightness) - verts[0].brightness) * invLongDy; + float longStepUW = (verts[2].u_over_w - verts[0].u_over_w) * invLongDy; + float longStepVW = (verts[2].v_over_w - verts[0].v_over_w) * invLongDy; + float longStepOW = (verts[2].one_over_w - verts[0].one_over_w) * invLongDy; + + // Short edge: verts[0]->verts[1] then verts[1]->verts[2] + // We set up the first segment and re-init at the midpoint. + float shortBri; + auto setupShortEdge = [&](const PalVertexXY& a, + const PalVertexXY& b, + PalVertexXY& edge, + float& sBri, + float& stepX, + float& stepZ, + float& stepBri, + float& stepUW, + float& stepVW, + float& stepOW, + int startY) { + float dy = b.y - a.y; + float invDy = (dy != 0.0f) ? 1.0f / dy : 0.0f; + float t0 = (startY - a.y) * invDy; + edge.x = a.x + t0 * (b.x - a.x); + edge.z = a.z + t0 * (b.z - a.z); + sBri = a.brightness + t0 * (static_cast(b.brightness) - a.brightness); + edge.u_over_w = a.u_over_w + t0 * (b.u_over_w - a.u_over_w); + edge.v_over_w = a.v_over_w + t0 * (b.v_over_w - a.v_over_w); + edge.one_over_w = a.one_over_w + t0 * (b.one_over_w - a.one_over_w); + stepX = (b.x - a.x) * invDy; + stepZ = (b.z - a.z) * invDy; + stepBri = (static_cast(b.brightness) - a.brightness) * invDy; + stepUW = (b.u_over_w - a.u_over_w) * invDy; + stepVW = (b.v_over_w - a.v_over_w) * invDy; + stepOW = (b.one_over_w - a.one_over_w) * invDy; + }; + + PalVertexXY shortEdge; + float shortStepX, shortStepZ, shortStepBri, shortStepUW, shortStepVW, shortStepOW; + int midY = static_cast(std::ceil(verts[1].y)); + bool pastMid = (minY >= midY); + if (pastMid) { + setupShortEdge( + verts[1], + verts[2], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + minY + ); + } + else { + setupShortEdge( + verts[0], + verts[1], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + minY + ); + } + + // Precompute material LUT row pointer for untextured triangles + const Uint8* materialLightRow = texels ? nullptr : &m_lightLUT[materialPalIdx * LIGHT_LEVELS]; + + for (int y = minY; y <= maxY; ++y) { + // Switch to second short edge segment at midpoint + if (!pastMid && y >= midY) { + pastMid = true; + setupShortEdge( + verts[1], + verts[2], + shortEdge, + shortBri, + shortStepX, + shortStepZ, + shortStepBri, + shortStepUW, + shortStepVW, + shortStepOW, + y + ); + } + + // Determine left/right from the two edges + float lx, lz, lBri, lUW, lVW, lOW; + float rx, rz, rBri, rUW, rVW, rOW; + if (shortEdge.x <= longEdge.x) { + lx = shortEdge.x; + lz = shortEdge.z; + lBri = shortBri; + lUW = shortEdge.u_over_w; + lVW = shortEdge.v_over_w; + lOW = shortEdge.one_over_w; + rx = longEdge.x; + rz = longEdge.z; + rBri = longBri; + rUW = longEdge.u_over_w; + rVW = longEdge.v_over_w; + rOW = longEdge.one_over_w; + } + else { + lx = longEdge.x; + lz = longEdge.z; + lBri = longBri; + lUW = longEdge.u_over_w; + lVW = longEdge.v_over_w; + lOW = longEdge.one_over_w; + rx = shortEdge.x; + rz = shortEdge.z; + rBri = shortBri; + rUW = shortEdge.u_over_w; + rVW = shortEdge.v_over_w; + rOW = shortEdge.one_over_w; + } + + int startX = std::max(0, static_cast(std::ceil(lx))); + int endX = std::min(m_width - 1, static_cast(std::floor(rx))); + + float span = rx - lx; + if (span <= 0.0f || startX > endX) { + // Step edges and continue + shortEdge.x += shortStepX; + shortEdge.z += shortStepZ; + shortBri += shortStepBri; + shortEdge.u_over_w += shortStepUW; + shortEdge.v_over_w += shortStepVW; + shortEdge.one_over_w += shortStepOW; + longEdge.x += longStepX; + longEdge.z += longStepZ; + longBri += longStepBri; + longEdge.u_over_w += longStepUW; + longEdge.v_over_w += longStepVW; + longEdge.one_over_w += longStepOW; + continue; + } + + float invSpan = 1.0f / span; + + // Precompute per-pixel step values + float zStep = (rz - lz) * invSpan; + float startT = (startX - lx) * invSpan; + float z = lz + startT * (rz - lz); + + // Integer brightness with 8-bit fractional part for stepping + int briFix = static_cast((lBri + startT * (rBri - lBri)) * 256.0f); + int briStepFix = static_cast((rBri - lBri) * invSpan * 256.0f); + + Uint8* row = pixels + y * pitch; + float* zPtr = &m_zBuffer[y * m_width + startX]; + + if (texels) { + // --- Textured scanline with periodic perspective correction --- + float uow = lUW + startT * (rUW - lUW); + float vow = lVW + startT * (rVW - lVW); + float oow = lOW + startT * (rOW - lOW); + float uowStep = (rUW - lUW) * invSpan; + float vowStep = (rVW - lVW) * invSpan; + float oowStep = (rOW - lOW) * invSpan; + + int x = startX; + while (x <= endX) { + // Perspective correction at this point + float inv_w0 = 1.0f / oow; + float u0 = uow * inv_w0; + float v0 = vow * inv_w0; + + int remaining = endX - x + 1; + int blockLen = (remaining > PERSP_STEP) ? PERSP_STEP : remaining; + + // Compute end-of-block perspective-correct UVs + float uowEnd = uow + uowStep * blockLen; + float vowEnd = vow + vowStep * blockLen; + float oowEnd = oow + oowStep * blockLen; + + float inv_w1 = 1.0f / oowEnd; + float u1 = uowEnd * inv_w1; + float v1 = vowEnd * inv_w1; + + // Affine step within this block + float invBlock = (blockLen > 1) ? (1.0f / blockLen) : 0.0f; + float uAffStep = (u1 - u0) * invBlock; + float vAffStep = (v1 - v0) * invBlock; + float uAff = u0; + float vAff = v0; + + float zLocal = z; + int briLocal = briFix; + float* zP = zPtr; + + for (int i = 0; i < blockLen; ++i, ++x) { + if (zLocal < *zP) { + int bri = briLocal >> 8; + if (bri < 0) { + bri = 0; + } + else if (bri >= LIGHT_LEVELS) { + bri = LIGHT_LEVELS - 1; + } + + // Fast UV tile: wrap to [0,1) + float uTile = uAff; + float vTile = vAff; + int ui = static_cast(uTile); + int vi = static_cast(vTile); + uTile -= ui; + vTile -= vi; + if (uTile < 0.0f) { + uTile += 1.0f; + } + if (vTile < 0.0f) { + vTile += 1.0f; + } + + int texX = static_cast(uTile * texWidthScale); + int texY = static_cast(vTile * texHeightScale); + + Uint8 texel = texels[texY * texturePitch + texX]; + + Uint8 palIdx = m_lightLUT[texel * LIGHT_LEVELS + bri]; + if (m_transparencyEnabled) { + row[x] = m_blendLUT[palIdx * 256 + row[x]]; + } + else { + *zP = zLocal; + row[x] = palIdx; + } + } + zLocal += zStep; + briLocal += briStepFix; + uAff += uAffStep; + vAff += vAffStep; + ++zP; + } + + z = zLocal; + briFix = briLocal; + zPtr = zP; + uow = uowEnd; + vow = vowEnd; + oow = oowEnd; + } + } + else { + // --- Untextured scanline --- + if (alpha == 0) { + // Fully transparent material, skip entire scanline + } + else { + for (int x = startX; x <= endX; ++x, ++zPtr, z += zStep, briFix += briStepFix) { + if (z >= *zPtr) { + continue; + } + + int bri = briFix >> 8; + if (bri < 0) { + bri = 0; + } + else if (bri >= LIGHT_LEVELS) { + bri = LIGHT_LEVELS - 1; + } + + Uint8 palIdx = materialLightRow[bri]; + + if (m_transparencyEnabled) { + row[x] = m_blendLUT[palIdx * 256 + row[x]]; + } + else { + *zPtr = z; + row[x] = palIdx; + } + } + } + } + + // Step both edges to next scanline + shortEdge.x += shortStepX; + shortEdge.z += shortStepZ; + shortBri += shortStepBri; + shortEdge.u_over_w += shortStepUW; + shortEdge.v_over_w += shortStepVW; + shortEdge.one_over_w += shortStepOW; + longEdge.x += longStepX; + longEdge.z += longStepZ; + longBri += longStepBri; + longEdge.u_over_w += longStepUW; + longEdge.v_over_w += longStepVW; + longEdge.one_over_w += longStepOW; + } +} + +struct PalCacheDestroyContext { + Direct3DRMPaletteSWRenderer* renderer; + Uint32 id; +}; + +void Direct3DRMPaletteSWRenderer::AddTextureDestroyCallback(Uint32 id, IDirect3DRMTexture* texture) +{ + auto* ctx = new PalCacheDestroyContext{this, id}; + texture->AddDestroyCallback( + [](IDirect3DRMObject* obj, void* arg) { + auto* ctx = static_cast(arg); + auto& cacheEntry = ctx->renderer->m_textures[ctx->id]; + if (cacheEntry.cached) { + // Only free surfaces we own (3D texture duplicates). + // UI textures point to the original surface — don't free those. + auto* origTexture = static_cast(cacheEntry.texture); + auto* origSurface = static_cast(origTexture->m_surface); + if (cacheEntry.cached != origSurface->m_surface) { + SDL_UnlockSurface(cacheEntry.cached); + SDL_DestroySurface(cacheEntry.cached); + } + cacheEntry.cached = nullptr; + cacheEntry.texture = nullptr; + } + delete ctx; + }, + ctx + ); +} + +// Build a 256-byte remap table from a texture's own palette to the game +// palette. For each source index, find the nearest colour in the game +// palette by Euclidean distance in RGB. +static void BuildPaletteRemap(Uint8* remap, SDL_Palette* srcPal, SDL_Palette* dstPal) +{ + if (!srcPal || !dstPal) { + // Identity if either palette is missing. + for (int i = 0; i < 256; ++i) { + remap[i] = static_cast(i); + } + return; + } + + const SDL_Color* sc = srcPal->colors; + const SDL_Color* dc = dstPal->colors; + int dn = dstPal->ncolors; + + for (int i = 0; i < 256; ++i) { + if (i >= srcPal->ncolors) { + remap[i] = 0; + continue; + } + int sr = sc[i].r, sg = sc[i].g, sb = sc[i].b; + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < dn; ++c) { + int dr = dc[c].r - sr; + int dg = dc[c].g - sg; + int db = dc[c].b - sb; + // Redmean approximation for perceptual color distance. + // Weights red and blue channels based on the average red + // value of the two colors being compared. This better + // preserves hue than plain Euclidean RGB distance. + int rmean = (sr + dc[c].r) / 2; + int dist = ((512 + rmean) * dr * dr >> 8) + 4 * dg * dg + ((767 - rmean) * db * db >> 8); + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + remap[i] = bestIdx; + } +} + +// Apply a remap table to every pixel in an INDEX8 surface (in-place). +static void RemapSurfacePixels(SDL_Surface* surf, const Uint8* remap) +{ + Uint8* px = static_cast(surf->pixels); + int pitch = surf->pitch; + for (int y = 0; y < surf->h; ++y) { + Uint8* row = px + y * pitch; + for (int x = 0; x < surf->w; ++x) { + row[x] = remap[row[x]]; + } + } +} + +// Remap an already-duplicated surface's pixels from its own palette to the +// given target palette. Called from BeginFrame so the remap always uses the +// palette that will be active for this frame's Flip. +static void RemapSurfaceToTargetPalette(SDL_Surface* surf, SDL_Palette* targetPal) +{ + SDL_Palette* srcPal = SDL_GetSurfacePalette(surf); + if (!srcPal || !targetPal || srcPal == targetPal) { + return; + } + + Uint8 remap[256]; + BuildPaletteRemap(remap, srcPal, targetPal); + + bool wasLocked = (surf->flags & SDL_SURFACE_LOCKED) != 0; + if (!wasLocked) { + SDL_LockSurface(surf); + } + RemapSurfacePixels(surf, remap); + if (!wasLocked) { + SDL_UnlockSurface(surf); + } + + SDL_SetSurfacePalette(surf, targetPal); +} + +Uint32 Direct3DRMPaletteSWRenderer::GetTextureId(IDirect3DRMTexture* iTexture, bool isUI, float scaleX, float scaleY) +{ + auto texture = static_cast(iTexture); + auto surface = static_cast(texture->m_surface); + + // Check if already mapped + for (Uint32 i = 0; i < m_textures.size(); ++i) { + auto& texRef = m_textures[i]; + if (texRef.texture == texture) { + if (isUI) { + // UI textures: always use the original surface directly. + // The game modifies these in-place (e.g. mosaic transition), + // so a cached duplicate would be stale. + texRef.cached = surface->m_surface; + } + else if (texRef.version != texture->m_version || !texRef.cached) { + if (texRef.cached) { + SDL_DestroySurface(texRef.cached); + } + // 3D textures: duplicate and remap to the flip palette. + texRef.cached = SDL_DuplicateSurface(surface->m_surface); + SDL_LockSurface(texRef.cached); + if (m_flipPalette) { + RemapSurfaceToTargetPalette(texRef.cached, m_flipPalette); + } + texRef.version = texture->m_version; + } + return i; + } + } + + SDL_Surface* converted; + if (isUI) { + // Use the original surface directly — no duplicate. + converted = surface->m_surface; + } + else { + // 3D textures: duplicate and remap to the flip palette. + converted = SDL_DuplicateSurface(surface->m_surface); + SDL_LockSurface(converted); + if (m_flipPalette) { + RemapSurfaceToTargetPalette(converted, m_flipPalette); + } + } + + // Reuse freed slot + for (Uint32 i = 0; i < m_textures.size(); ++i) { + auto& texRef = m_textures[i]; + if (!texRef.texture) { + texRef = {texture, texture->m_version, converted}; + AddTextureDestroyCallback(i, texture); + return i; + } + } + + m_textures.push_back({texture, texture->m_version, converted}); + AddTextureDestroyCallback(static_cast(m_textures.size() - 1), texture); + return static_cast(m_textures.size() - 1); +} + +static PaletteMeshCache PalUploadMesh(const MeshGroup& meshGroup) +{ + PaletteMeshCache cache{&meshGroup, meshGroup.version}; + cache.flat = meshGroup.quality == D3DRMRENDER_FLAT || meshGroup.quality == D3DRMRENDER_UNLITFLAT; + + if (cache.flat) { + FlattenSurfaces( + meshGroup.vertices.data(), + meshGroup.vertices.size(), + meshGroup.indices.data(), + meshGroup.indices.size(), + meshGroup.texture != nullptr, + cache.vertices, + cache.indices + ); + } + else { + cache.vertices.assign(meshGroup.vertices.begin(), meshGroup.vertices.end()); + cache.indices.assign(meshGroup.indices.begin(), meshGroup.indices.end()); + } + + return cache; +} + +void Direct3DRMPaletteSWRenderer::AddMeshDestroyCallback(Uint32 id, IDirect3DRMMesh* mesh) +{ + auto* ctx = new PalCacheDestroyContext{this, id}; + mesh->AddDestroyCallback( + [](IDirect3DRMObject* obj, void* arg) { + auto* ctx = static_cast(arg); + auto& cacheEntry = ctx->renderer->m_meshes[ctx->id]; + if (cacheEntry.meshGroup) { + cacheEntry.meshGroup = nullptr; + cacheEntry.vertices.clear(); + cacheEntry.indices.clear(); + } + delete ctx; + }, + ctx + ); +} + +Uint32 Direct3DRMPaletteSWRenderer::GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) +{ + for (Uint32 i = 0; i < m_meshes.size(); ++i) { + auto& cache = m_meshes[i]; + if (cache.meshGroup == meshGroup) { + if (cache.version != meshGroup->version) { + cache = std::move(PalUploadMesh(*meshGroup)); + } + return i; + } + } + + auto newCache = PalUploadMesh(*meshGroup); + + for (Uint32 i = 0; i < m_meshes.size(); ++i) { + auto& cache = m_meshes[i]; + if (!cache.meshGroup) { + cache = std::move(newCache); + AddMeshDestroyCallback(i, mesh); + return i; + } + } + + m_meshes.push_back(std::move(newCache)); + AddMeshDestroyCallback((Uint32) (m_meshes.size() - 1), mesh); + return (Uint32) (m_meshes.size() - 1); +} + +HRESULT Direct3DRMPaletteSWRenderer::BeginFrame() +{ + if (!m_renderedImage || !SDL_LockSurface(m_renderedImage)) { + return DDERR_GENERIC; + } + + // Rebuild lighting LUT if palette changed + if (m_lightLUTDirty) { + m_palette = SDL_GetSurfacePalette(m_renderedImage); + BuildLightingLUT(); + BuildBlendLUT(); + } + + // Use the palette snapshot from the previous Flip (m_flipPalette) for + // texture remapping. Only remap when the flip palette actually changes + // (i.e. on scene transitions), not every frame. + if (m_flipPalette && m_flipPaletteDirty) { + m_flipPaletteDirty = false; + + int grassGreens = 0; + for (int i = 0; i < m_flipPalette->ncolors; ++i) { + SDL_Color c = m_flipPalette->colors[i]; + if (c.g >= 60 && c.g <= 125 && c.r >= 35 && c.r <= 95 && c.b >= 20 && c.b <= 65 && c.g > c.r) { + grassGreens++; + } + } + int invalidated = 0; + + // Invalidate all cached 3D textures so they get re-remapped + // against the new palette on next use in GetTextureId. + for (auto& texRef : m_textures) { + if (!texRef.texture || !texRef.cached) { + continue; + } + auto* origSurface = + static_cast(static_cast(texRef.texture)->m_surface); + if (texRef.cached == origSurface->m_surface) { + continue; + } + SDL_UnlockSurface(texRef.cached); + SDL_DestroySurface(texRef.cached); + texRef.cached = nullptr; + texRef.version = 0; + invalidated++; + } + + // Rebuild lighting/blend LUTs for the new palette + BuildLightingLUT(); + BuildBlendLUT(); + } + + ClearZBuffer(); + m_transparencyEnabled = false; + return DD_OK; +} + +void Direct3DRMPaletteSWRenderer::EnableTransparency() +{ + m_transparencyEnabled = true; +} + +void Direct3DRMPaletteSWRenderer::SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance +) +{ + memcpy(m_normalMatrix, normalMatrix, sizeof(Matrix3x3)); + + auto& mesh = m_meshes[meshId]; + + // Pre-transform all vertex positions and normals + m_transformedVerts.clear(); + m_transformedVerts.reserve(mesh.vertices.size()); + for (const auto& src : mesh.vertices) { + D3DRMVERTEX& dst = m_transformedVerts.emplace_back(); + dst.position = TransformPoint(src.position, modelViewMatrix); + dst.normal = src.normal; + dst.texCoord = src.texCoord; + } + + // Assemble triangles using index buffer + for (size_t i = 0; i + 2 < mesh.indices.size(); i += 3) { + DrawTriangleClipped( + {m_transformedVerts[mesh.indices[i]], + m_transformedVerts[mesh.indices[i + 1]], + m_transformedVerts[mesh.indices[i + 2]]}, + appearance + ); + } +} + +HRESULT Direct3DRMPaletteSWRenderer::FinalizeFrame() +{ + SDL_UnlockSurface(m_renderedImage); + + return DD_OK; +} + +void Direct3DRMPaletteSWRenderer::Resize(int width, int height, const ViewportTransform& viewportTransform) +{ + m_viewportTransform = viewportTransform; + float aspect = static_cast(width) / height; + float virtualAspect = static_cast(m_virtualWidth) / m_virtualHeight; + + // Cap to virtual canvase for performance + if (aspect > virtualAspect) { + m_height = std::min(height, (int) m_virtualHeight); + m_width = static_cast(m_height * aspect); + } + else { + m_width = std::min(width, (int) m_virtualWidth); + m_height = static_cast(m_width / aspect); + } + + m_viewportTransform.scale = + std::min(static_cast(m_width) / m_virtualWidth, static_cast(m_height) / m_virtualHeight); + + m_viewportTransform.offsetX = (m_width - (m_virtualWidth * m_viewportTransform.scale)) / 2.0f; + m_viewportTransform.offsetY = (m_height - (m_virtualHeight * m_viewportTransform.scale)) / 2.0f; + + if (m_renderedImage) { + SDL_DestroySurface(m_renderedImage); + } + m_renderedImage = SDL_CreateSurface(m_width, m_height, SDL_PIXELFORMAT_INDEX8); + + // If we already have a palette, attach it to the new surface + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + } + + m_zBuffer.resize(m_width * m_height); +} + +void Direct3DRMPaletteSWRenderer::Clear(float r, float g, float b) +{ + if (!m_palette) { + SDL_FillSurfaceRect(m_renderedImage, nullptr, 0); + return; + } + + // Find nearest palette entry + Uint8 tr = static_cast(r * 255); + Uint8 tg = static_cast(g * 255); + Uint8 tb = static_cast(b * 255); + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - tr; + int dg = m_palette->colors[c].g - tg; + int db = m_palette->colors[c].b - tb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + + SDL_FillSurfaceRect(m_renderedImage, nullptr, bestIdx); +} + +void Direct3DRMPaletteSWRenderer::Flip() +{ + if (!m_renderedImage || !m_renderedImage->pixels) { + return; + } + + SDL_Surface* winSurface = SDL_GetWindowSurface(DDWindow); + if (!winSurface) { + return; + } + if (!winSurface->pixels) { + return; + } + + if (winSurface->format == SDL_PIXELFORMAT_INDEX8) { + // Window surface is paletted — copy indices directly and set + // the palette on the destination so the DAC/display picks it up. + if (m_palette) { + SDL_SetSurfacePalette(winSurface, m_palette); + } + + Uint8* src = static_cast(m_renderedImage->pixels); + Uint8* dst = static_cast(winSurface->pixels); + int srcPitch = m_renderedImage->pitch; + int dstPitch = winSurface->pitch; + + if (m_width * 2 <= winSurface->w && m_height * 2 <= winSurface->h) { + // 2x nearest-neighbor upscale (half-res rendering) + for (int row = 0; row < m_height; ++row) { + Uint8* srcRow = src + row * srcPitch; + Uint8* dstRow0 = dst + (row * 2) * dstPitch; + Uint8* dstRow1 = dstRow0 + dstPitch; + for (int col = 0; col < m_width; ++col) { + Uint8 px = srcRow[col]; + dstRow0[col * 2] = px; + dstRow0[col * 2 + 1] = px; + dstRow1[col * 2] = px; + dstRow1[col * 2 + 1] = px; + } + } + } + else { + int copyH = std::min(m_height, winSurface->h); + int copyW = std::min(m_width, winSurface->w); + if (srcPitch == dstPitch && copyW == m_width) { + memcpy(dst, src, static_cast(srcPitch) * copyH); + } + else { + for (int row = 0; row < copyH; ++row) { + memcpy(dst + row * dstPitch, src + row * srcPitch, copyW); + } + } + } + } + else { + // Window surface is not paletted — let SDL convert INDEX8 → dest format. + // Use scaled blit to handle fullscreen on high-res displays. + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + SDL_BlitSurfaceScaled(m_renderedImage, nullptr, winSurface, nullptr, SDL_SCALEMODE_NEAREST); + } + } + + SDL_UpdateWindowSurface(DDWindow); + + // Snapshot the palette for the lighting LUT. + SDL_Palette* displayedPal = + (winSurface->format == SDL_PIXELFORMAT_INDEX8) ? SDL_GetSurfacePalette(winSurface) : m_palette; + if (displayedPal) { + if (!m_flipPalette) { + m_flipPalette = SDL_CreatePalette(256); + } + if (!PalettesEqual(displayedPal, m_flipPalette)) { + SDL_SetPaletteColors(m_flipPalette, displayedPal->colors, 0, displayedPal->ncolors); + m_flipPaletteDirty = true; + } + } +} + +void Direct3DRMPaletteSWRenderer::Draw2DImage( + Uint32 textureId, + const SDL_Rect& srcRect, + const SDL_Rect& dstRect, + FColor color +) +{ + SDL_Rect centeredRect = { + static_cast(dstRect.x * m_viewportTransform.scale + m_viewportTransform.offsetX), + static_cast(dstRect.y * m_viewportTransform.scale + m_viewportTransform.offsetY), + static_cast(dstRect.w * m_viewportTransform.scale), + static_cast(dstRect.h * m_viewportTransform.scale), + }; + + if (textureId == NO_TEXTURE_ID) { + // Fill with nearest palette colour + if (m_palette) { + Uint8 tr = static_cast(color.r * 255); + Uint8 tg = static_cast(color.g * 255); + Uint8 tb = static_cast(color.b * 255); + int bestDist = INT_MAX; + Uint8 bestIdx = 0; + for (int c = 0; c < m_palette->ncolors; ++c) { + int dr = m_palette->colors[c].r - tr; + int dg = m_palette->colors[c].g - tg; + int db = m_palette->colors[c].b - tb; + int dist = dr * dr + dg * dg + db * db; + if (dist < bestDist) { + bestDist = dist; + bestIdx = static_cast(c); + if (dist == 0) { + break; + } + } + } + SDL_FillSurfaceRect(m_renderedImage, ¢eredRect, bestIdx); + } + else { + SDL_FillSurfaceRect(m_renderedImage, ¢eredRect, 0); + } + return; + } + + // Raw INDEX8 blit — copy palette indices directly, no SDL palette + // remapping. This is the hot path for 2D (video, UI overlays). + SDL_Surface* surface = m_textures[textureId].cached; + + // Only check the surface color key when the caller explicitly requested it + // (via DDBLT_KEYSRC / DDBLTFAST_SRCCOLORKEY). Many surfaces have a stale + // color key set that should not be used for normal blits (e.g. SMK video). + Uint32 colorKey = 0; + bool hasColorKey = SDL_GetSurfaceColorKey(surface, &colorKey); + + bool wasLocked = (surface->flags & SDL_SURFACE_LOCKED) != 0; + if (wasLocked) { + SDL_UnlockSurface(surface); + } + Uint8* src = static_cast(surface->pixels); + Uint8* dst = static_cast(m_renderedImage->pixels); + int srcPitch = surface->pitch; + int dstPitch = m_renderedImage->pitch; + + int dstX0 = std::max(0, centeredRect.x); + int dstY0 = std::max(0, centeredRect.y); + int dstX1 = std::min(m_width, centeredRect.x + centeredRect.w); + int dstY1 = std::min(m_height, centeredRect.y + centeredRect.h); + + Uint8 ckByte = static_cast(colorKey); + + if (!hasColorKey && centeredRect.w == srcRect.w && centeredRect.h == srcRect.h) { + // 1:1 opaque copy — fast memcpy per scanline + int copyW = dstX1 - dstX0; + int copyH = dstY1 - dstY0; + if (copyW > 0 && copyH > 0) { + int srcStartX = srcRect.x + (dstX0 - centeredRect.x); + int srcStartY = srcRect.y + (dstY0 - centeredRect.y); + for (int row = 0; row < copyH; ++row) { + memcpy(dst + (dstY0 + row) * dstPitch + dstX0, src + (srcStartY + row) * srcPitch + srcStartX, copyW); + } + } + } + else if (centeredRect.w == srcRect.w && centeredRect.h == srcRect.h) { + // 1:1 copy with color key + int copyW = dstX1 - dstX0; + int copyH = dstY1 - dstY0; + int srcStartX = srcRect.x + (dstX0 - centeredRect.x); + int srcStartY = srcRect.y + (dstY0 - centeredRect.y); + for (int row = 0; row < copyH; ++row) { + Uint8* srcRow = src + (srcStartY + row) * srcPitch + srcStartX; + Uint8* dstRow = dst + (dstY0 + row) * dstPitch + dstX0; + for (int col = 0; col < copyW; ++col) { + Uint8 px = srcRow[col]; + if (px != ckByte) { + dstRow[col] = px; + } + } + } + } + else if (!hasColorKey) { + // Scaled blit, no color key + for (int dy = dstY0; dy < dstY1; ++dy) { + int sy = srcRect.y + (dy - centeredRect.y) * srcRect.h / centeredRect.h; + Uint8* dstRow = dst + dy * dstPitch; + Uint8* srcRow = src + sy * srcPitch; + for (int dx = dstX0; dx < dstX1; ++dx) { + int sx = srcRect.x + (dx - centeredRect.x) * srcRect.w / centeredRect.w; + dstRow[dx] = srcRow[sx]; + } + } + } + else { + // Scaled blit with color key + for (int dy = dstY0; dy < dstY1; ++dy) { + int sy = srcRect.y + (dy - centeredRect.y) * srcRect.h / centeredRect.h; + Uint8* dstRow = dst + dy * dstPitch; + Uint8* srcRow = src + sy * srcPitch; + for (int dx = dstX0; dx < dstX1; ++dx) { + int sx = srcRect.x + (dx - centeredRect.x) * srcRect.w / centeredRect.w; + Uint8 px = srcRow[sx]; + if (px != ckByte) { + dstRow[dx] = px; + } + } + } + } + if (wasLocked) { + SDL_LockSurface(surface); + } +} + +void Direct3DRMPaletteSWRenderer::SetDither(bool dither) +{ + (void) dither; +} + +void Direct3DRMPaletteSWRenderer::SetPalette(SDL_Palette* palette) +{ + m_palette = palette; + m_lightLUTDirty = true; + if (m_renderedImage) { + SDL_SetSurfacePalette(m_renderedImage, palette); + } +} + +void Direct3DRMPaletteSWRenderer::Download(SDL_Surface* target) +{ + if (!m_renderedImage || !target) { + return; + } + + // Extract the viewport region (excluding pillarbox/letterbox borders) + // and scale it to fill the target, matching the software renderer. + SDL_Rect srcRect = { + static_cast(m_viewportTransform.offsetX), + static_cast(m_viewportTransform.offsetY), + static_cast(m_virtualWidth * m_viewportTransform.scale), + static_cast(m_virtualHeight * m_viewportTransform.scale), + }; + + if (m_palette) { + SDL_SetSurfacePalette(m_renderedImage, m_palette); + } + SDL_BlitSurfaceScaled(m_renderedImage, &srcRect, target, nullptr, SDL_SCALEMODE_NEAREST); +} diff --git a/miniwin/src/d3drm/backends/software/renderer.cpp b/miniwin/src/d3drm/backends/software/renderer.cpp index b945fbc9..796cfc92 100644 --- a/miniwin/src/d3drm/backends/software/renderer.cpp +++ b/miniwin/src/d3drm/backends/software/renderer.cpp @@ -64,14 +64,14 @@ void Direct3DRMSoftwareRenderer::ClearZBuffer() const float inf = std::numeric_limits::infinity(); size_t i = 0; -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) +#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && defined(__SSE2__) if (SDL_HasSSE2()) { __m128 inf4 = _mm_set1_ps(inf); for (; i + 4 <= size; i += 4) { _mm_storeu_ps(&m_zBuffer[i], inf4); } } -#if defined(__i386__) || defined(_M_IX86) +#if (defined(__i386__) || defined(_M_IX86)) && defined(__MMX__) else if (SDL_HasMMX()) { const __m64 mm_inf = _mm_set_pi32(0x7F800000, 0x7F800000); for (; i + 2 <= size; i += 2) { @@ -826,6 +826,13 @@ void Direct3DRMSoftwareRenderer::SetDither(bool dither) { } +void Direct3DRMSoftwareRenderer::SetPalette(SDL_Palette* palette) +{ + if (m_renderedImage) { + SDL_SetSurfacePalette(m_renderedImage, palette); + } +} + void Direct3DRMSoftwareRenderer::Download(SDL_Surface* target) { SDL_Rect srcRect = { diff --git a/miniwin/src/d3drm/d3drmrenderer.cpp b/miniwin/src/d3drm/d3drmrenderer.cpp index b30c5b23..f7272b34 100644 --- a/miniwin/src/d3drm/d3drmrenderer.cpp +++ b/miniwin/src/d3drm/d3drmrenderer.cpp @@ -20,6 +20,9 @@ #ifdef USE_SOFTWARE_RENDER #include "d3drmrenderer_software.h" #endif +#ifdef USE_PALETTE_SW_RENDER +#include "d3drmrenderer_palettesw.h" +#endif #ifdef USE_GXM #include "d3drmrenderer_gxm.h" #endif @@ -74,6 +77,11 @@ Direct3DRMRenderer* CreateDirect3DRMRenderer( if (SDL_memcmp(guid, &GXM_GUID, sizeof(GUID)) == 0) { return GXMRenderer::Create(DDSDesc.dwWidth, DDSDesc.dwHeight, d3d->GetMSAASamples()); } +#endif +#ifdef USE_PALETTE_SW_RENDER + if (SDL_memcmp(guid, &PALETTE_SW_GUID, sizeof(GUID)) == 0) { + return new Direct3DRMPaletteSWRenderer(DDSDesc.dwWidth, DDSDesc.dwHeight); + } #endif return nullptr; } @@ -101,6 +109,9 @@ void Direct3DRMRenderer_EnumDevices(const IDirect3DMiniwin* d3d, LPD3DENUMDEVICE #ifdef USE_SOFTWARE_RENDER Direct3DRMSoftware_EnumDevice(cb, ctx); #endif +#ifdef USE_PALETTE_SW_RENDER + Direct3DRMPaletteSW_EnumDevice(cb, ctx); +#endif #ifdef USE_GXM GXMRenderer_EnumDevice(cb, ctx); #endif diff --git a/miniwin/src/ddraw/ddraw.cpp b/miniwin/src/ddraw/ddraw.cpp index b28a105b..c79b334f 100644 --- a/miniwin/src/ddraw/ddraw.cpp +++ b/miniwin/src/ddraw/ddraw.cpp @@ -245,7 +245,12 @@ HRESULT DirectDrawImpl::GetDisplayMode(LPDDSURFACEDESC lpDDSurfaceDesc) #ifdef MINIWIN_PIXELFORMAT format = MINIWIN_PIXELFORMAT; #else - format = mode->format; + if (m_virtualBPP == 8 || (m_frameBuffer && m_frameBuffer->IsIndex8())) { + format = SDL_PIXELFORMAT_INDEX8; + } + else { + format = mode->format; + } #endif const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(format); @@ -308,6 +313,7 @@ HRESULT DirectDrawImpl::SetDisplayMode(DWORD dwWidth, DWORD dwHeight, DWORD dwBP { m_virtualWidth = dwWidth; m_virtualHeight = dwHeight; + m_virtualBPP = dwBPP; return DD_OK; } diff --git a/miniwin/src/ddraw/ddsurface.cpp b/miniwin/src/ddraw/ddsurface.cpp index 58ee938a..04235a28 100644 --- a/miniwin/src/ddraw/ddsurface.cpp +++ b/miniwin/src/ddraw/ddsurface.cpp @@ -53,13 +53,20 @@ HRESULT DirectDrawSurfaceImpl::Blt( ) { if ((dwFlags & DDBLT_COLORFILL) == DDBLT_COLORFILL) { - Uint8 a = (lpDDBltFx->dwFillColor >> 24) & 0xFF; - Uint8 r = (lpDDBltFx->dwFillColor >> 16) & 0xFF; - Uint8 g = (lpDDBltFx->dwFillColor >> 8) & 0xFF; - Uint8 b = lpDDBltFx->dwFillColor & 0xFF; + Uint32 color; + if (m_surface->format == SDL_PIXELFORMAT_INDEX8) { + // For INDEX8 surfaces the fill color is a palette index, not RGBA. + color = lpDDBltFx->dwFillColor & 0xFF; + } + else { + Uint8 a = (lpDDBltFx->dwFillColor >> 24) & 0xFF; + Uint8 r = (lpDDBltFx->dwFillColor >> 16) & 0xFF; + Uint8 g = (lpDDBltFx->dwFillColor >> 8) & 0xFF; + Uint8 b = lpDDBltFx->dwFillColor & 0xFF; - const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(m_surface->format); - Uint32 color = SDL_MapRGBA(details, nullptr, r, g, b, a); + const SDL_PixelFormatDetails* details = SDL_GetPixelFormatDetails(m_surface->format); + color = SDL_MapRGBA(details, nullptr, r, g, b, a); + } if (lpDestRect) { SDL_Rect dstRect = ConvertRect(lpDestRect); SDL_FillSurfaceRect(m_surface, &dstRect, color); diff --git a/miniwin/src/ddraw/framebuffer.cpp b/miniwin/src/ddraw/framebuffer.cpp index 82022649..067596cd 100644 --- a/miniwin/src/ddraw/framebuffer.cpp +++ b/miniwin/src/ddraw/framebuffer.cpp @@ -9,7 +9,11 @@ FrameBufferImpl::FrameBufferImpl(DWORD virtualWidth, DWORD virtualHeight) : m_virtualWidth(virtualWidth), m_virtualHeight(virtualHeight) { +#ifdef __DJGPP__ + m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_INDEX8); +#else m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_RGBA32); +#endif } FrameBufferImpl::~FrameBufferImpl() @@ -49,7 +53,7 @@ HRESULT FrameBufferImpl::Blt( return DDERR_GENERIC; } - if (dynamic_cast(lpDDSrcSurface) == this) { + if (dynamic_cast(lpDDSrcSurface)) { return Flip(nullptr, DDFLIP_WAIT); } @@ -103,7 +107,11 @@ HRESULT FrameBufferImpl::BltFast( int height = lpSrcRect ? (lpSrcRect->bottom - lpSrcRect->top) : surface->m_surface->h; RECT destRect = {(int) dwX, (int) dwY, (int) (dwX + width), (int) (dwY + height)}; - return Blt(&destRect, lpDDSrcSurface, lpSrcRect, DDBLT_NONE, nullptr); + DDBltFlags flags = DDBLT_NONE; + if ((dwTrans & DDBLTFAST_SRCCOLORKEY) == DDBLTFAST_SRCCOLORKEY) { + flags = flags | DDBLT_KEYSRC; + } + return Blt(&destRect, lpDDSrcSurface, lpSrcRect, flags, nullptr); } HRESULT FrameBufferImpl::Flip(LPDIRECTDRAWSURFACE lpDDSurfaceTargetOverride, DDFlipFlags dwFlags) @@ -210,8 +218,13 @@ HRESULT FrameBufferImpl::SetColorKey(DDColorKeyFlags dwFlags, LPDDCOLORKEY lpDDC HRESULT FrameBufferImpl::SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) { - if (m_transferBuffer->m_surface->format != SDL_PIXELFORMAT_INDEX8) { - MINIWIN_NOT_IMPLEMENTED(); + // If the transfer buffer is not INDEX8 yet, recreate it — but only when + // the renderer actually works with paletted surfaces (palette SW / DOS). + // GL-based renderers use RGBA32 transfer buffers and convert on upload. + if (m_transferBuffer->m_surface->format != SDL_PIXELFORMAT_INDEX8 && DDRenderer && + DDRenderer->UsesPalettedSurfaces()) { + m_transferBuffer->Release(); + m_transferBuffer = new DirectDrawSurfaceImpl(m_virtualWidth, m_virtualHeight, SDL_PIXELFORMAT_INDEX8); } lpDDPalette->AddRef(); @@ -222,6 +235,11 @@ HRESULT FrameBufferImpl::SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) m_palette = lpDDPalette; SDL_SetSurfacePalette(m_transferBuffer->m_surface, ((DirectDrawPaletteImpl*) m_palette)->m_palette); + + if (DDRenderer) { + DDRenderer->SetPalette(((DirectDrawPaletteImpl*) m_palette)->m_palette); + } + return DD_OK; } diff --git a/miniwin/src/internal/d3drmrenderer.h b/miniwin/src/internal/d3drmrenderer.h index 7c19a8b4..6ddeaeb1 100644 --- a/miniwin/src/internal/d3drmrenderer.h +++ b/miniwin/src/internal/d3drmrenderer.h @@ -55,6 +55,8 @@ class Direct3DRMRenderer : public IDirect3DDevice2 { virtual void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) = 0; virtual void Download(SDL_Surface* target) = 0; virtual void SetDither(bool dither) = 0; + virtual void SetPalette(SDL_Palette* palette) {} + virtual bool UsesPalettedSurfaces() const { return false; } protected: int m_width, m_height; diff --git a/miniwin/src/internal/d3drmrenderer_palettesw.h b/miniwin/src/internal/d3drmrenderer_palettesw.h new file mode 100644 index 00000000..fbebe560 --- /dev/null +++ b/miniwin/src/internal/d3drmrenderer_palettesw.h @@ -0,0 +1,116 @@ +#pragma once + +#include "d3drmrenderer.h" +#include "d3drmtexture_impl.h" +#include "ddraw_impl.h" + +#include +#include +#include + +DEFINE_GUID(PALETTE_SW_GUID, 0x682656F3, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07); + +struct PaletteTextureCache { + Direct3DRMTextureImpl* texture; + Uint8 version; + SDL_Surface* cached; +}; + +struct PaletteMeshCache { + const MeshGroup* meshGroup; + int version; + bool flat; + std::vector vertices; + std::vector indices; +}; + +class Direct3DRMPaletteSWRenderer : public Direct3DRMRenderer { +public: + Direct3DRMPaletteSWRenderer(DWORD width, DWORD height); + ~Direct3DRMPaletteSWRenderer() override; + void PushLights(const SceneLight* vertices, size_t count) override; + Uint32 GetTextureId(IDirect3DRMTexture* texture, bool isUI, float scaleX, float scaleY) override; + Uint32 GetMeshId(IDirect3DRMMesh* mesh, const MeshGroup* meshGroup) override; + void SetProjection(const D3DRMMATRIX4D& projection, D3DVALUE front, D3DVALUE back) override; + void SetFrustumPlanes(const Plane* frustumPlanes) override; + HRESULT BeginFrame() override; + void EnableTransparency() override; + void SubmitDraw( + DWORD meshId, + const D3DRMMATRIX4D& modelViewMatrix, + const D3DRMMATRIX4D& worldMatrix, + const D3DRMMATRIX4D& viewMatrix, + const Matrix3x3& normalMatrix, + const Appearance& appearance + ) override; + HRESULT FinalizeFrame() override; + void Resize(int width, int height, const ViewportTransform& viewportTransform) override; + void Clear(float r, float g, float b) override; + void Flip() override; + void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) override; + void Download(SDL_Surface* target) override; + void SetDither(bool dither) override; + void SetPalette(SDL_Palette* palette) override; + bool UsesPalettedSurfaces() const override { return true; } + +private: + void ClearZBuffer(); + void DrawTriangleProjected( + const D3DRMVERTEX& v0, + const D3DRMVERTEX& v1, + const D3DRMVERTEX& v2, + const Appearance& appearance + ); + void DrawTriangleClipped(const D3DRMVERTEX (&v)[3], const Appearance& appearance); + void ProjectVertex(const D3DVECTOR& v, D3DRMVECTOR4D& p) const; + Uint8 ApplyLighting(const D3DVECTOR& position, const D3DVECTOR& normal, const Appearance& appearance, Uint8 texel); + void BuildLightingLUT(); + void BuildBlendLUT(); + void AddTextureDestroyCallback(Uint32 id, IDirect3DRMTexture* texture); + void AddMeshDestroyCallback(Uint32 id, IDirect3DRMMesh* mesh); + + SDL_Surface* m_renderedImage = nullptr; + SDL_Palette* m_palette = nullptr; + SDL_Palette* m_flipPalette = nullptr; // Palette snapshot taken at Flip time (the correct one) + bool m_flipPaletteDirty = false; + std::vector m_lights; + std::vector m_textures; + std::vector m_meshes; + D3DVALUE m_front; + D3DVALUE m_back; + Matrix3x3 m_normalMatrix; + D3DRMMATRIX4D m_projection; + std::vector m_zBuffer; + std::vector m_transformedVerts; + Plane m_frustumPlanes[6]; + + // Lighting LUT: for each of 256 palette entries x 32 brightness levels, + // store the best-matching palette index. + // Usage: m_lightLUT[paletteIndex * 32 + brightnessLevel] + static constexpr int LIGHT_LEVELS = 32; + Uint8 m_lightLUT[256 * LIGHT_LEVELS]; + + // Blend LUT: for any two palette indices, the pre-computed 50/50 blend + // result mapped to the nearest palette colour. + // Usage: m_blendLUT[srcIndex * 256 + dstIndex] + Uint8 m_blendLUT[256 * 256]; + + bool m_lightLUTDirty = true; + bool m_transparencyEnabled = false; +}; + +inline static void Direct3DRMPaletteSW_EnumDevice(LPD3DENUMDEVICESCALLBACK cb, void* ctx) +{ + D3DDEVICEDESC halDesc = {}; + + D3DDEVICEDESC helDesc = {}; + helDesc.dcmColorModel = D3DCOLOR_RGB; + helDesc.dwFlags = D3DDD_DEVICEZBUFFERBITDEPTH; + helDesc.dwDeviceZBufferBitDepth = DDBD_16; + helDesc.dwDeviceRenderBitDepth = DDBD_8; + helDesc.dpcTriCaps.dwTextureCaps = D3DPTEXTURECAPS_PERSPECTIVE; + helDesc.dpcTriCaps.dwShadeCaps = D3DPSHADECAPS_ALPHAFLATBLEND; + helDesc.dpcTriCaps.dwTextureFilterCaps = D3DPTFILTERCAPS_LINEAR; + + EnumDevice(cb, ctx, "Miniwin Paletted Software", &halDesc, &helDesc, PALETTE_SW_GUID); +} diff --git a/miniwin/src/internal/d3drmrenderer_software.h b/miniwin/src/internal/d3drmrenderer_software.h index 0c422597..1821b492 100644 --- a/miniwin/src/internal/d3drmrenderer_software.h +++ b/miniwin/src/internal/d3drmrenderer_software.h @@ -50,6 +50,7 @@ class Direct3DRMSoftwareRenderer : public Direct3DRMRenderer { void Draw2DImage(Uint32 textureId, const SDL_Rect& srcRect, const SDL_Rect& dstRect, FColor color) override; void Download(SDL_Surface* target) override; void SetDither(bool dither) override; + void SetPalette(SDL_Palette* palette) override; private: void ClearZBuffer(); diff --git a/miniwin/src/internal/ddraw_impl.h b/miniwin/src/internal/ddraw_impl.h index 0da2d883..8f2ffe2e 100644 --- a/miniwin/src/internal/ddraw_impl.h +++ b/miniwin/src/internal/ddraw_impl.h @@ -61,9 +61,10 @@ struct DirectDrawImpl : public IDirectDraw2, public IDirect3D2, public IDirect3D float GetAnisotropic() const override { return m_anisotropic; } private: - FrameBufferImpl* m_frameBuffer; + FrameBufferImpl* m_frameBuffer = nullptr; int m_virtualWidth = 0; int m_virtualHeight = 0; + int m_virtualBPP = 0; DWORD m_msaaSamples = 0; float m_anisotropic = 0.0f; }; diff --git a/miniwin/src/internal/framebuffer_impl.h b/miniwin/src/internal/framebuffer_impl.h index 47805e69..76693f90 100644 --- a/miniwin/src/internal/framebuffer_impl.h +++ b/miniwin/src/internal/framebuffer_impl.h @@ -36,6 +36,8 @@ struct FrameBufferImpl : public IDirectDrawSurface3 { HRESULT SetPalette(LPDIRECTDRAWPALETTE lpDDPalette) override; HRESULT Unlock(LPVOID lpSurfaceData) override; + bool IsIndex8() const { return m_transferBuffer->m_surface->format == SDL_PIXELFORMAT_INDEX8; } + private: uint32_t m_virtualWidth; uint32_t m_virtualHeight;