Compare commits

..

29 Commits

Author SHA1 Message Date
FengChen
b27aa2ccca video_core: Preserve multisampled textures when overlap 2023-03-12 19:12:13 +08:00
liamwhite
ec4e2d1fab Merge pull request #9916 from liamwhite/fpu
kernel: clone fpu status on CreateThread
2023-03-09 09:19:15 -05:00
liamwhite
b5c0c1e163 Merge pull request #9822 from ameerj/buffcache-ssbo-addr
buffer_cache: Add logic for non-NVN storage buffer tracking
2023-03-09 09:18:39 -05:00
bunnei
4562f7af9a Merge pull request #9906 from german77/metroid2
input_common: Increase mouse sensitivity range
2023-03-08 10:43:38 -08:00
liamwhite
f3f57f90fe Merge pull request #9912 from liamwhite/err
hle: rename legacy errors to Results
2023-03-08 09:16:27 -05:00
liamwhite
b5d61f214d Merge pull request #9904 from liamwhite/ws
kernel: fix WaitSynchronization
2023-03-08 09:16:17 -05:00
liamwhite
3cf88a4d6c Merge pull request #9896 from Kelebek1/d24s8
Check all swizzle components for red, not just [0]
2023-03-08 09:16:06 -05:00
Morph
d72d753b1a Merge pull request #9921 from liamwhite/override
general: fix type inconsistencies
2023-03-07 22:42:40 -05:00
Morph
a3ffea6a64 Merge pull request #9918 from liamwhite/fwrapv
kernel: avoid signed overflow UB on MSVC
2023-03-07 22:42:32 -05:00
Morph
b014fdacdb Merge pull request #9920 from liamwhite/constexpr-bit-cast
common: make BitCast constexpr
2023-03-07 22:42:19 -05:00
Narr the Reg
757aafa582 input_common: Minor typo issues (#9922) 2023-03-08 03:15:46 +01:00
german77
9a9e5844d3 input_common: Increase mouse sensitivity range 2023-03-07 19:31:52 -06:00
Liam
64dcb40db1 common: make BitCast constexpr 2023-03-07 20:26:56 -05:00
Liam
ba4213d956 general: fix type inconsistencies 2023-03-07 20:05:19 -05:00
Liam
d45ac00d48 kernel: avoid signed overflow UB on MSVC 2023-03-07 19:46:48 -05:00
Liam
484641003c kernel: clone fpu status on CreateThread 2023-03-07 19:18:06 -05:00
liamwhite
a7792e5ff8 Merge pull request #9889 from Morph1984/time-is-ticking
core_timing: Reduce CPU usage on Windows
2023-03-07 10:54:13 -05:00
Liam
644ee0043e kernel: fix WaitSynchronization 2023-03-05 10:29:10 -05:00
Morph
376a414f5b native_clock: Round RDTSC frequency to the nearest 1000 2023-03-05 02:36:31 -05:00
Morph
026eaddbee timer_resolution: Set current process to High QoS
Ensures that this process is treated as a high performance process by the Windows scheduler.
2023-03-05 02:36:31 -05:00
Morph
3453beb1e0 general: Target Windows 10 SDK
We no longer support operating systems below Windows 10.
2023-03-05 02:36:31 -05:00
Morph
194cf0b497 hardware_properties: Update BASE_CLOCK_RATE to exactly 1020 MHz 2023-03-05 02:36:31 -05:00
Morph
bff1453282 core_timing: Use higher precision sleeps on Windows
The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows.
Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution.
This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop.
2023-03-05 02:36:31 -05:00
Morph
7e353082ac main: (Windows) Set the current timer resolution to the maximum
Increases the precision of thread sleeps on Windows.
2023-03-05 02:36:31 -05:00
Morph
7fffdf83b7 wall_clock: Make use of SteadyClock 2023-03-05 02:36:31 -05:00
Morph
1ed49f92dd common: Implement a method to change the Windows timer resolution
This utilizes undocumented NtDll functions to change the current timer resolution from the default of 1ms.
2023-03-05 01:41:28 -05:00
Morph
bd09c82521 common: Implement a high resolution steady clock
This implementation provides a consistent, high performance, and high resolution clock where/when std::chrono::steady_clock does not provide sufficient precision.
2023-03-05 01:41:19 -05:00
Kelebek1
a7fb80e612 Check all swizzle components for red, not just [0], pass float border color rather than int 2023-03-04 02:33:50 +00:00
ameerj
2ce5bb9bd6 buffer_cache: Add logic for non-NVN storage buffer tracking 2023-02-25 16:24:21 -05:00
44 changed files with 536 additions and 172 deletions

View File

@@ -477,8 +477,8 @@ if (APPLE)
find_library(COCOA_LIBRARY Cocoa)
set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
elseif (WIN32)
# WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
# Target Windows 10
add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00)
set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
if (MINGW)
# PSAPI is the Process Status API

6
dist/yuzu.manifest vendored
View File

@@ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later
<application>
<!-- Windows 10 -->
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
<!-- Windows 8.1 -->
<supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
<!-- Windows 8 -->
<supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
<!-- Windows 7 -->
<supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
</application>
</compatibility>
<trustInfo

View File

@@ -91,6 +91,7 @@ add_library(common STATIC
multi_level_page_table.h
nvidia_flags.cpp
nvidia_flags.h
overflow.h
page_table.cpp
page_table.h
param_package.cpp
@@ -113,6 +114,8 @@ add_library(common STATIC
socket_types.h
spin_lock.cpp
spin_lock.h
steady_clock.cpp
steady_clock.h
stream.cpp
stream.h
string_util.cpp
@@ -142,6 +145,14 @@ add_library(common STATIC
zstd_compression.h
)
if (WIN32)
target_sources(common PRIVATE
windows/timer_resolution.cpp
windows/timer_resolution.h
)
target_link_libraries(common PRIVATE ntdll)
endif()
if(ARCHITECTURE_x86_64)
target_sources(common
PRIVATE

View File

@@ -3,19 +3,21 @@
#pragma once
#include <cstring>
#include <type_traits>
#include <version>
#ifdef __cpp_lib_bit_cast
#include <bit>
#endif
namespace Common {
template <typename To, typename From>
[[nodiscard]] std::enable_if_t<sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From> &&
std::is_trivially_copyable_v<To>,
To>
BitCast(const From& src) noexcept {
To dst;
std::memcpy(&dst, &src, sizeof(To));
return dst;
constexpr inline To BitCast(const From& from) {
#ifdef __cpp_lib_bit_cast
return std::bit_cast<To>(from);
#else
return __builtin_bit_cast(To, from);
#endif
}
} // namespace Common

View File

@@ -46,7 +46,7 @@ enum class PollingMode {
// Constant polling of buttons, analogs and motion data
Active,
// Only update on button change, digital analogs
Pasive,
Passive,
// Enable near field communication polling
NFC,
// Enable infrared camera polling

22
src/common/overflow.h Normal file
View File

@@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <type_traits>
#include "bit_cast.h"
namespace Common {
template <typename T>
requires(std::is_integral_v<T> && std::is_signed_v<T>)
inline T WrappingAdd(T lhs, T rhs) {
using U = std::make_unsigned_t<T>;
U lhs_u = BitCast<U>(lhs);
U rhs_u = BitCast<U>(rhs);
return BitCast<T>(lhs_u + rhs_u);
}
} // namespace Common

View File

@@ -503,7 +503,7 @@ struct Values {
Setting<bool> tas_loop{false, "tas_loop"};
Setting<bool> mouse_panning{false, "mouse_panning"};
Setting<u8, true> mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"};
Setting<u8, true> mouse_panning_sensitivity{50, 1, 100, "mouse_panning_sensitivity"};
Setting<bool> mouse_enabled{false, "mouse_enabled"};
Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"};

View File

@@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#if defined(_WIN32)
#include <windows.h>
#else
#include <time.h>
#endif
#include "common/steady_clock.h"
namespace Common {
#ifdef _WIN32
static s64 WindowsQueryPerformanceFrequency() {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return frequency.QuadPart;
}
static s64 WindowsQueryPerformanceCounter() {
LARGE_INTEGER counter;
QueryPerformanceCounter(&counter);
return counter.QuadPart;
}
#endif
SteadyClock::time_point SteadyClock::Now() noexcept {
#if defined(_WIN32)
static const auto freq = WindowsQueryPerformanceFrequency();
const auto counter = WindowsQueryPerformanceCounter();
// 10 MHz is a very common QPC frequency on modern PCs.
// Optimizing for this specific frequency can double the performance of
// this function by avoiding the expensive frequency conversion path.
static constexpr s64 TenMHz = 10'000'000;
if (freq == TenMHz) [[likely]] {
static_assert(period::den % TenMHz == 0);
static constexpr s64 Multiplier = period::den / TenMHz;
return time_point{duration{counter * Multiplier}};
}
const auto whole = (counter / freq) * period::den;
const auto part = (counter % freq) * period::den / freq;
return time_point{duration{whole + part}};
#elif defined(__APPLE__)
return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}};
#else
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}};
#endif
}
}; // namespace Common

23
src/common/steady_clock.h Normal file
View File

@@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include "common/common_types.h"
namespace Common {
struct SteadyClock {
using rep = s64;
using period = std::nano;
using duration = std::chrono::nanoseconds;
using time_point = std::chrono::time_point<SteadyClock>;
static constexpr bool is_steady = true;
[[nodiscard]] static time_point Now() noexcept;
};
} // namespace Common

View File

@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/steady_clock.h"
#include "common/uint128.h"
#include "common/wall_clock.h"
@@ -11,45 +12,32 @@
namespace Common {
using base_timer = std::chrono::steady_clock;
using base_time_point = std::chrono::time_point<base_timer>;
class StandardWallClock final : public WallClock {
public:
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
start_time = base_timer::now();
}
: WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false},
start_time{SteadyClock::Now()} {}
std::chrono::nanoseconds GetTimeNS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
return SteadyClock::Now() - start_time;
}
std::chrono::microseconds GetTimeUS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS());
}
std::chrono::milliseconds GetTimeMS() override {
base_time_point current = base_timer::now();
auto elapsed = current - start_time;
return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS());
}
u64 GetClockCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporary =
Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
return Common::Divide128On32(temporary, 1000000000).first;
const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency);
return Common::Divide128On32(temp, NS_RATIO).first;
}
u64 GetCPUCycles() override {
std::chrono::nanoseconds time_now = GetTimeNS();
const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
return Common::Divide128On32(temporary, 1000000000).first;
const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency);
return Common::Divide128On32(temp, NS_RATIO).first;
}
void Pause([[maybe_unused]] bool is_paused) override {
@@ -57,7 +45,7 @@ public:
}
private:
base_time_point start_time;
SteadyClock::time_point start_time;
};
#ifdef ARCHITECTURE_x86_64
@@ -93,4 +81,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
#endif
std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
u64 emulated_clock_frequency) {
return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
}
} // namespace Common

View File

@@ -55,4 +55,7 @@ private:
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
u64 emulated_clock_frequency);
[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
u64 emulated_clock_frequency);
} // namespace Common

View File

@@ -0,0 +1,109 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <windows.h>
#include "common/windows/timer_resolution.h"
extern "C" {
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html
NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution,
PULONG CurrentResolution);
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html
NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution,
PULONG CurrentResolution);
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html
NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval);
}
// Defines for compatibility with older Windows 10 SDKs.
#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED
#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1
#endif
#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION
#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4
#endif
namespace Common::Windows {
namespace {
using namespace std::chrono;
constexpr nanoseconds ToNS(ULONG hundred_ns) {
return nanoseconds{hundred_ns * 100};
}
constexpr ULONG ToHundredNS(nanoseconds ns) {
return static_cast<ULONG>(ns.count()) / 100;
}
struct TimerResolution {
std::chrono::nanoseconds minimum;
std::chrono::nanoseconds maximum;
std::chrono::nanoseconds current;
};
TimerResolution GetTimerResolution() {
ULONG MinimumTimerResolution;
ULONG MaximumTimerResolution;
ULONG CurrentTimerResolution;
NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution,
&CurrentTimerResolution);
return {
.minimum{ToNS(MinimumTimerResolution)},
.maximum{ToNS(MaximumTimerResolution)},
.current{ToNS(CurrentTimerResolution)},
};
}
void SetHighQoS() {
// https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service
PROCESS_POWER_THROTTLING_STATE PowerThrottling{
.Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION},
.ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED |
PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION},
.StateMask{},
};
SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling,
sizeof(PROCESS_POWER_THROTTLING_STATE));
}
} // Anonymous namespace
nanoseconds GetMinimumTimerResolution() {
return GetTimerResolution().minimum;
}
nanoseconds GetMaximumTimerResolution() {
return GetTimerResolution().maximum;
}
nanoseconds GetCurrentTimerResolution() {
return GetTimerResolution().current;
}
nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) {
// Set the timer resolution, and return the current timer resolution.
const auto DesiredTimerResolution = ToHundredNS(timer_resolution);
ULONG CurrentTimerResolution;
NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution);
return ToNS(CurrentTimerResolution);
}
nanoseconds SetCurrentTimerResolutionToMaximum() {
SetHighQoS();
return SetCurrentTimerResolution(GetMaximumTimerResolution());
}
void SleepForOneTick() {
LARGE_INTEGER DelayInterval{
.QuadPart{-1},
};
NtDelayExecution(FALSE, &DelayInterval);
}
} // namespace Common::Windows

View File

@@ -0,0 +1,38 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
namespace Common::Windows {
/// Returns the minimum (least precise) supported timer resolution in nanoseconds.
std::chrono::nanoseconds GetMinimumTimerResolution();
/// Returns the maximum (most precise) supported timer resolution in nanoseconds.
std::chrono::nanoseconds GetMaximumTimerResolution();
/// Returns the current timer resolution in nanoseconds.
std::chrono::nanoseconds GetCurrentTimerResolution();
/**
* Sets the current timer resolution.
*
* @param timer_resolution Timer resolution in nanoseconds.
*
* @returns The current timer resolution.
*/
std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution);
/**
* Sets the current timer resolution to the maximum supported timer resolution.
*
* @returns The current timer resolution.
*/
std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum();
/// Sleep for one tick of the current timer resolution.
void SleepForOneTick();
} // namespace Common::Windows

View File

@@ -6,6 +6,7 @@
#include <thread>
#include "common/atomic_ops.h"
#include "common/steady_clock.h"
#include "common/uint128.h"
#include "common/x64/native_clock.h"
@@ -39,6 +40,12 @@ static u64 FencedRDTSC() {
}
#endif
template <u64 Nearest>
static u64 RoundToNearest(u64 value) {
const auto mod = value % Nearest;
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
}
u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc.
FencedRDTSC();
@@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() {
FencedRDTSC();
// Get the current time.
const auto start_time = std::chrono::steady_clock::now();
const auto start_time = Common::SteadyClock::Now();
const u64 tsc_start = FencedRDTSC();
// Wait for 200 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{200});
const auto end_time = std::chrono::steady_clock::now();
// Wait for 250 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{250});
const auto end_time = Common::SteadyClock::Now();
const u64 tsc_end = FencedRDTSC();
// Calculate differences.
const u64 timer_diff = static_cast<u64>(
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
const u64 tsc_diff = tsc_end - tsc_start;
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
return tsc_freq;
return RoundToNearest<1000>(tsc_freq);
}
namespace X64 {

View File

@@ -6,6 +6,10 @@
#include <string>
#include <tuple>
#ifdef _WIN32
#include "common/windows/timer_resolution.h"
#endif
#include "common/microprofile.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
@@ -38,7 +42,8 @@ struct CoreTiming::Event {
};
CoreTiming::CoreTiming()
: clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
: cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
CoreTiming::~CoreTiming() {
Reset();
@@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() {
}
u64 CoreTiming::GetCPUTicks() const {
if (is_multicore) {
return clock->GetCPUCycles();
if (is_multicore) [[likely]] {
return cpu_clock->GetCPUCycles();
}
return ticks;
}
u64 CoreTiming::GetClockTicks() const {
if (is_multicore) {
return clock->GetClockCycles();
if (is_multicore) [[likely]] {
return cpu_clock->GetClockCycles();
}
return CpuCyclesToClockCycles(ticks);
}
@@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() {
const auto next_time = Advance();
if (next_time) {
// There are more events left in the queue, wait until the next event.
const auto wait_time = *next_time - GetGlobalTimeNs().count();
auto wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) {
#ifdef _WIN32
// Assume a timer resolution of 1ms.
static constexpr s64 TimerResolutionNS = 1000000;
const auto timer_resolution_ns =
Common::Windows::GetCurrentTimerResolution().count();
// Sleep in discrete intervals of the timer resolution, and spin the rest.
const auto sleep_time = wait_time - (wait_time % TimerResolutionNS);
if (sleep_time > 0) {
event.WaitFor(std::chrono::nanoseconds(sleep_time));
}
while (!paused && !event.IsSet() && wait_time > 0) {
wait_time = *next_time - GetGlobalTimeNs().count();
while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) {
// Yield to reduce thread starvation.
std::this_thread::yield();
if (wait_time >= timer_resolution_ns) {
Common::Windows::SleepForOneTick();
} else {
std::this_thread::yield();
}
}
if (event.IsSet()) {
@@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() {
}
paused_set = true;
clock->Pause(true);
event_clock->Pause(true);
pause_event.Wait();
clock->Pause(false);
event_clock->Pause(false);
}
}
@@ -303,16 +307,23 @@ void CoreTiming::Reset() {
has_started = false;
}
std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
if (is_multicore) [[likely]] {
return cpu_clock->GetTimeNS();
}
return CyclesToNs(ticks);
}
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
if (is_multicore) {
return clock->GetTimeNS();
if (is_multicore) [[likely]] {
return event_clock->GetTimeNS();
}
return CyclesToNs(ticks);
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
if (is_multicore) {
return clock->GetTimeUS();
if (is_multicore) [[likely]] {
return event_clock->GetTimeUS();
}
return CyclesToUs(ticks);
}

View File

@@ -122,6 +122,9 @@ public:
/// Returns current time in emulated in Clock cycles
u64 GetClockTicks() const;
/// Returns current time in nanoseconds.
std::chrono::nanoseconds GetCPUTimeNs() const;
/// Returns current time in microseconds.
std::chrono::microseconds GetGlobalTimeUs() const;
@@ -139,7 +142,8 @@ private:
void Reset();
std::unique_ptr<Common::WallClock> clock;
std::unique_ptr<Common::WallClock> cpu_clock;
std::unique_ptr<Common::WallClock> event_clock;
s64 global_timer = 0;

View File

@@ -13,11 +13,9 @@ namespace Core {
namespace Hardware {
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
// The exact value used is of course unverified.
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz
constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
// Virtual to Physical core map.
constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{

View File

@@ -44,11 +44,11 @@ const KAddressSpaceInfo& GetAddressSpaceInfo(size_t width, KAddressSpaceInfo::Ty
} // namespace
uintptr_t KAddressSpaceInfo::GetAddressSpaceStart(size_t width, KAddressSpaceInfo::Type type) {
std::size_t KAddressSpaceInfo::GetAddressSpaceStart(size_t width, KAddressSpaceInfo::Type type) {
return GetAddressSpaceInfo(width, type).address;
}
size_t KAddressSpaceInfo::GetAddressSpaceSize(size_t width, KAddressSpaceInfo::Type type) {
std::size_t KAddressSpaceInfo::GetAddressSpaceSize(size_t width, KAddressSpaceInfo::Type type) {
return GetAddressSpaceInfo(width, type).size;
}

View File

@@ -18,7 +18,7 @@ struct KAddressSpaceInfo final {
Count,
};
static u64 GetAddressSpaceStart(std::size_t width, Type type);
static std::size_t GetAddressSpaceStart(std::size_t width, Type type);
static std::size_t GetAddressSpaceSize(std::size_t width, Type type);
const std::size_t bit_width{};

View File

@@ -21,9 +21,9 @@ public:
~KDeviceAddressSpace();
Result Initialize(u64 address, u64 size);
void Finalize();
void Finalize() override;
bool IsInitialized() const {
bool IsInitialized() const override {
return m_is_initialized;
}
static void PostDestroy(uintptr_t arg) {}

View File

@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/overflow.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/kernel/k_resource_limit.h"
@@ -104,7 +105,7 @@ bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) {
ASSERT(current_hints[index] <= current_values[index]);
// If we would overflow, don't allow to succeed.
if (current_values[index] + value <= current_values[index]) {
if (Common::WrappingAdd(current_values[index], value) <= current_values[index]) {
break;
}

View File

@@ -49,6 +49,7 @@ static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context,
context.cpu_registers[0] = arg;
context.cpu_registers[15] = entry_point;
context.cpu_registers[13] = stack_top;
context.fpscr = 0;
}
static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top,
@@ -58,8 +59,8 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
context.cpu_registers[18] = Kernel::KSystemControl::GenerateRandomU64() | 1;
context.pc = entry_point;
context.sp = stack_top;
// TODO(merry): Perform a hardware test to determine the below value.
context.fpcr = 0;
context.fpsr = 0;
}
} // namespace
@@ -815,6 +816,27 @@ void KThread::Continue() {
KScheduler::OnThreadStateChanged(kernel, this, old_state);
}
void KThread::CloneFpuStatus() {
// We shouldn't reach here when starting kernel threads.
ASSERT(this->GetOwnerProcess() != nullptr);
ASSERT(this->GetOwnerProcess() == GetCurrentProcessPointer(kernel));
if (this->GetOwnerProcess()->Is64BitProcess()) {
// Clone FPSR and FPCR.
ThreadContext64 cur_ctx{};
kernel.System().CurrentArmInterface().SaveContext(cur_ctx);
this->GetContext64().fpcr = cur_ctx.fpcr;
this->GetContext64().fpsr = cur_ctx.fpsr;
} else {
// Clone FPSCR.
ThreadContext32 cur_ctx{};
kernel.System().CurrentArmInterface().SaveContext(cur_ctx);
this->GetContext32().fpscr = cur_ctx.fpscr;
}
}
Result KThread::SetActivity(Svc::ThreadActivity activity) {
// Lock ourselves.
KScopedLightLock lk(activity_pause_lock);

View File

@@ -254,6 +254,8 @@ public:
thread_context_32.tpidr = static_cast<u32>(value);
}
void CloneFpuStatus();
[[nodiscard]] ThreadContext32& GetContext32() {
return thread_context_32;
}

View File

@@ -48,19 +48,15 @@ Result ResetSignal(Core::System& system, Handle handle) {
return ResultInvalidHandle;
}
/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
Result WaitSynchronization(Core::System& system, s32* index, VAddr handles_address, s32 num_handles,
s64 nano_seconds) {
LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, num_handles={}, nano_seconds={}",
handles_address, num_handles, nano_seconds);
static Result WaitSynchronization(Core::System& system, int32_t* out_index, const Handle* handles,
int32_t num_handles, int64_t timeout_ns) {
// Ensure number of handles is valid.
R_UNLESS(0 <= num_handles && num_handles <= ArgumentHandleCountMax, ResultOutOfRange);
R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
// Get the synchronization context.
auto& kernel = system.Kernel();
auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
std::vector<KSynchronizationObject*> objs(num_handles);
const auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
Handle* handles = system.Memory().GetPointer<Handle>(handles_address);
// Copy user handles.
if (num_handles > 0) {
@@ -68,21 +64,38 @@ Result WaitSynchronization(Core::System& system, s32* index, VAddr handles_addre
R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles,
num_handles),
ResultInvalidHandle);
for (const auto& obj : objs) {
kernel.RegisterInUseObject(obj);
}
}
// Ensure handles are closed when we're done.
SCOPE_EXIT({
for (s32 i = 0; i < num_handles; ++i) {
kernel.UnregisterInUseObject(objs[i]);
for (auto i = 0; i < num_handles; ++i) {
objs[i]->Close();
}
});
return KSynchronizationObject::Wait(kernel, index, objs.data(), static_cast<s32>(objs.size()),
nano_seconds);
// Wait on the objects.
Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(),
static_cast<s32>(objs.size()), timeout_ns);
R_SUCCEED_IF(res == ResultSessionClosed);
R_RETURN(res);
}
/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
Result WaitSynchronization(Core::System& system, int32_t* out_index, VAddr user_handles,
int32_t num_handles, int64_t timeout_ns) {
LOG_TRACE(Kernel_SVC, "called user_handles={:#x}, num_handles={}, timeout_ns={}", user_handles,
num_handles, timeout_ns);
// Ensure number of handles is valid.
R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
std::vector<Handle> handles(num_handles);
if (num_handles > 0) {
system.Memory().ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));
}
R_RETURN(WaitSynchronization(system, out_index, handles.data(), num_handles, timeout_ns));
}
/// Resumes a thread waiting on WaitSynchronization

View File

@@ -82,6 +82,9 @@ Result CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point,
// Commit the thread reservation.
thread_reservation.Commit();
// Clone the current fpu status to the new thread.
thread->CloneFpuStatus();
// Register the new thread.
KThread::Register(kernel, thread);

View File

@@ -307,8 +307,8 @@ Common::Input::DriverResult Joycons::SetPollingMode(const PadIdentifier& identif
switch (polling_mode) {
case Common::Input::PollingMode::Active:
return static_cast<Common::Input::DriverResult>(handle->SetActiveMode());
case Common::Input::PollingMode::Pasive:
return static_cast<Common::Input::DriverResult>(handle->SetPasiveMode());
case Common::Input::PollingMode::Passive:
return static_cast<Common::Input::DriverResult>(handle->SetPassiveMode());
case Common::Input::PollingMode::IR:
return static_cast<Common::Input::DriverResult>(handle->SetIrMode());
case Common::Input::PollingMode::NFC:

View File

@@ -3,6 +3,7 @@
#include <thread>
#include <fmt/format.h>
#include <math.h>
#include "common/param_package.h"
#include "common/settings.h"
@@ -11,8 +12,9 @@
namespace InputCommon {
constexpr int update_time = 10;
constexpr float default_stick_sensitivity = 0.022f;
constexpr float default_motion_sensitivity = 0.008f;
constexpr float default_stick_sensitivity = 0.0044f;
constexpr float default_motion_sensitivity = 0.0003f;
constexpr float maximum_rotation_speed = 2.0f;
constexpr int mouse_axis_x = 0;
constexpr int mouse_axis_y = 1;
constexpr int wheel_axis_x = 2;
@@ -99,11 +101,13 @@ void Mouse::UpdateMotionInput() {
const float sensitivity =
Settings::values.mouse_panning_sensitivity.GetValue() * default_motion_sensitivity;
// Slow movement by 7%
if (Settings::values.mouse_panning) {
last_motion_change *= 0.93f;
} else {
last_motion_change.z *= 0.93f;
const float rotation_velocity = std::sqrt(last_motion_change.x * last_motion_change.x +
last_motion_change.y * last_motion_change.y);
if (rotation_velocity > maximum_rotation_speed / sensitivity) {
const float multiplier = maximum_rotation_speed / rotation_velocity / sensitivity;
last_motion_change.x = last_motion_change.x * multiplier;
last_motion_change.y = last_motion_change.y * multiplier;
}
const BasicMotion motion_data{
@@ -116,6 +120,12 @@ void Mouse::UpdateMotionInput() {
.delta_timestamp = update_time * 1000,
};
if (Settings::values.mouse_panning) {
last_motion_change.x = 0;
last_motion_change.y = 0;
}
last_motion_change.z = 0;
SetMotion(motion_identifier, 0, motion_data);
}
@@ -125,7 +135,7 @@ void Mouse::Move(int x, int y, int center_x, int center_y) {
auto mouse_change =
(Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
Common::Vec3<float> motion_change{-mouse_change.y, -mouse_change.x, last_motion_change.z};
last_motion_change += {-mouse_change.y, -mouse_change.x, last_motion_change.z};
const auto move_distance = mouse_change.Length();
if (move_distance == 0) {
@@ -141,7 +151,6 @@ void Mouse::Move(int x, int y, int center_x, int center_y) {
// Average mouse movements
last_mouse_change = (last_mouse_change * 0.91f) + (mouse_change * 0.09f);
last_motion_change = (last_motion_change * 0.69f) + (motion_change * 0.31f);
const auto last_move_distance = last_mouse_change.Length();

View File

@@ -60,6 +60,6 @@ private:
std::string file_path{};
State state{State::Initialized};
std::vector<u8> nfc_data;
Common::Input::PollingMode polling_mode{Common::Input::PollingMode::Pasive};
Common::Input::PollingMode polling_mode{Common::Input::PollingMode::Passive};
};
} // namespace InputCommon

View File

@@ -410,7 +410,7 @@ DriverResult JoyconDriver::SetIrsConfig(IrsMode mode_, IrsResolution format_) {
return result;
}
DriverResult JoyconDriver::SetPasiveMode() {
DriverResult JoyconDriver::SetPassiveMode() {
std::scoped_lock lock{mutex};
motion_enabled = false;
hidbus_enabled = false;

View File

@@ -44,7 +44,7 @@ public:
DriverResult SetVibration(const VibrationValue& vibration);
DriverResult SetLedConfig(u8 led_pattern);
DriverResult SetIrsConfig(IrsMode mode_, IrsResolution format_);
DriverResult SetPasiveMode();
DriverResult SetPassiveMode();
DriverResult SetActiveMode();
DriverResult SetIrMode();
DriverResult SetNfcMode();

View File

@@ -78,7 +78,7 @@ enum class PadButton : u32 {
Capture = 0x200000,
};
enum class PasivePadButton : u32 {
enum class PassivePadButton : u32 {
Down_A = 0x0001,
Right_X = 0x0002,
Left_B = 0x0004,
@@ -95,7 +95,7 @@ enum class PasivePadButton : u32 {
ZL_ZR = 0x8000,
};
enum class PasivePadStick : u8 {
enum class PassivePadStick : u8 {
Right = 0x00,
RightDown = 0x01,
Down = 0x02,

View File

@@ -48,13 +48,13 @@ void JoyconPoller::ReadPassiveMode(std::span<u8> buffer) {
switch (device_type) {
case ControllerType::Left:
UpdatePasiveLeftPadInput(data);
UpdatePassiveLeftPadInput(data);
break;
case ControllerType::Right:
UpdatePasiveRightPadInput(data);
UpdatePassiveRightPadInput(data);
break;
case ControllerType::Pro:
UpdatePasiveProPadInput(data);
UpdatePassiveProPadInput(data);
break;
default:
break;
@@ -210,12 +210,12 @@ void JoyconPoller::UpdateActiveProPadInput(const InputReportActive& input,
}
}
void JoyconPoller::UpdatePasiveLeftPadInput(const InputReportPassive& input) {
static constexpr std::array<PasivePadButton, 11> left_buttons{
PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B,
PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR,
PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Minus,
PasivePadButton::Capture, PasivePadButton::StickL,
void JoyconPoller::UpdatePassiveLeftPadInput(const InputReportPassive& input) {
static constexpr std::array<PassivePadButton, 11> left_buttons{
PassivePadButton::Down_A, PassivePadButton::Right_X, PassivePadButton::Left_B,
PassivePadButton::Up_Y, PassivePadButton::SL, PassivePadButton::SR,
PassivePadButton::L_R, PassivePadButton::ZL_ZR, PassivePadButton::Minus,
PassivePadButton::Capture, PassivePadButton::StickL,
};
for (auto left_button : left_buttons) {
@@ -225,17 +225,17 @@ void JoyconPoller::UpdatePasiveLeftPadInput(const InputReportPassive& input) {
}
const auto [left_axis_x, left_axis_y] =
GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state));
GetPassiveAxisValue(static_cast<PassivePadStick>(input.stick_state));
callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickX), left_axis_x);
callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickY), left_axis_y);
}
void JoyconPoller::UpdatePasiveRightPadInput(const InputReportPassive& input) {
static constexpr std::array<PasivePadButton, 11> right_buttons{
PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B,
PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR,
PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Plus,
PasivePadButton::Home, PasivePadButton::StickR,
void JoyconPoller::UpdatePassiveRightPadInput(const InputReportPassive& input) {
static constexpr std::array<PassivePadButton, 11> right_buttons{
PassivePadButton::Down_A, PassivePadButton::Right_X, PassivePadButton::Left_B,
PassivePadButton::Up_Y, PassivePadButton::SL, PassivePadButton::SR,
PassivePadButton::L_R, PassivePadButton::ZL_ZR, PassivePadButton::Plus,
PassivePadButton::Home, PassivePadButton::StickR,
};
for (auto right_button : right_buttons) {
@@ -245,18 +245,18 @@ void JoyconPoller::UpdatePasiveRightPadInput(const InputReportPassive& input) {
}
const auto [right_axis_x, right_axis_y] =
GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state));
GetPassiveAxisValue(static_cast<PassivePadStick>(input.stick_state));
callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickX), right_axis_x);
callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickY), right_axis_y);
}
void JoyconPoller::UpdatePasiveProPadInput(const InputReportPassive& input) {
static constexpr std::array<PasivePadButton, 14> pro_buttons{
PasivePadButton::Down_A, PasivePadButton::Right_X, PasivePadButton::Left_B,
PasivePadButton::Up_Y, PasivePadButton::SL, PasivePadButton::SR,
PasivePadButton::L_R, PasivePadButton::ZL_ZR, PasivePadButton::Minus,
PasivePadButton::Plus, PasivePadButton::Capture, PasivePadButton::Home,
PasivePadButton::StickL, PasivePadButton::StickR,
void JoyconPoller::UpdatePassiveProPadInput(const InputReportPassive& input) {
static constexpr std::array<PassivePadButton, 14> pro_buttons{
PassivePadButton::Down_A, PassivePadButton::Right_X, PassivePadButton::Left_B,
PassivePadButton::Up_Y, PassivePadButton::SL, PassivePadButton::SR,
PassivePadButton::L_R, PassivePadButton::ZL_ZR, PassivePadButton::Minus,
PassivePadButton::Plus, PassivePadButton::Capture, PassivePadButton::Home,
PassivePadButton::StickL, PassivePadButton::StickR,
};
for (auto pro_button : pro_buttons) {
@@ -266,9 +266,9 @@ void JoyconPoller::UpdatePasiveProPadInput(const InputReportPassive& input) {
}
const auto [left_axis_x, left_axis_y] =
GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state && 0xf));
GetPassiveAxisValue(static_cast<PassivePadStick>(input.stick_state & 0xf));
const auto [right_axis_x, right_axis_y] =
GetPassiveAxisValue(static_cast<PasivePadStick>(input.stick_state >> 4));
GetPassiveAxisValue(static_cast<PassivePadStick>(input.stick_state >> 4));
callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickX), left_axis_x);
callbacks.on_stick_data(static_cast<int>(PadAxes::LeftStickY), left_axis_y);
callbacks.on_stick_data(static_cast<int>(PadAxes::RightStickX), right_axis_x);
@@ -283,25 +283,25 @@ f32 JoyconPoller::GetAxisValue(u16 raw_value, Joycon::JoyStickAxisCalibration ca
return value / calibration.min;
}
std::pair<f32, f32> JoyconPoller::GetPassiveAxisValue(PasivePadStick raw_value) const {
std::pair<f32, f32> JoyconPoller::GetPassiveAxisValue(PassivePadStick raw_value) const {
switch (raw_value) {
case PasivePadStick::Right:
case PassivePadStick::Right:
return {1.0f, 0.0f};
case PasivePadStick::RightDown:
case PassivePadStick::RightDown:
return {1.0f, -1.0f};
case PasivePadStick::Down:
case PassivePadStick::Down:
return {0.0f, -1.0f};
case PasivePadStick::DownLeft:
case PassivePadStick::DownLeft:
return {-1.0f, -1.0f};
case PasivePadStick::Left:
case PassivePadStick::Left:
return {-1.0f, 0.0f};
case PasivePadStick::LeftUp:
case PassivePadStick::LeftUp:
return {-1.0f, 1.0f};
case PasivePadStick::Up:
case PassivePadStick::Up:
return {0.0f, 1.0f};
case PasivePadStick::UpRight:
case PassivePadStick::UpRight:
return {1.0f, 1.0f};
case PasivePadStick::Neutral:
case PassivePadStick::Neutral:
default:
return {0.0f, 0.0f};
}

View File

@@ -46,15 +46,15 @@ private:
const MotionStatus& motion_status);
void UpdateActiveProPadInput(const InputReportActive& input, const MotionStatus& motion_status);
void UpdatePasiveLeftPadInput(const InputReportPassive& buffer);
void UpdatePasiveRightPadInput(const InputReportPassive& buffer);
void UpdatePasiveProPadInput(const InputReportPassive& buffer);
void UpdatePassiveLeftPadInput(const InputReportPassive& buffer);
void UpdatePassiveRightPadInput(const InputReportPassive& buffer);
void UpdatePassiveProPadInput(const InputReportPassive& buffer);
/// Returns a calibrated joystick axis from raw axis data
f32 GetAxisValue(u16 raw_value, JoyStickAxisCalibration calibration) const;
/// Returns a digital joystick axis from passive axis data
std::pair<f32, f32> GetPassiveAxisValue(PasivePadStick raw_value) const;
std::pair<f32, f32> GetPassiveAxisValue(PassivePadStick raw_value) const;
/// Returns a calibrated accelerometer axis from raw motion data
f32 GetAccelerometerValue(s16 raw, const MotionSensorCalibration& cal,

View File

@@ -146,6 +146,7 @@ void MappingFactory::RegisterMotion(const MappingData& data) {
if (data.engine == "mouse") {
new_input.Set("motion", 0);
new_input.Set("pad", 1);
new_input.Set("threshold", 0.001f);
input_queue.Push(new_input);
return;
}

View File

@@ -35,6 +35,7 @@ struct Bias {
u32 index;
u32 offset_begin;
u32 offset_end;
u32 alignment;
};
using boost::container::flat_set;
@@ -349,7 +350,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(),
.offset = offset.U32(),
};
if (!Common::IsAligned(storage_buffer.offset, 16)) {
const u32 alignment{bias ? bias->alignment : 8U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned
return std::nullopt;
}
@@ -371,6 +373,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
.index = 0,
.offset_begin = 0x110,
.offset_end = 0x610,
.alignment = 16,
};
// Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@@ -386,8 +389,11 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) {
// If that also fails, use NVN fallbacks
LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks");
return;
}
LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}",
storage_buffer->index, storage_buffer->offset);
}
// Collect storage buffer and the instruction
if (IsGlobalMemoryWrite(inst)) {

View File

@@ -383,7 +383,8 @@ private:
void NotifyBufferDeletion();
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
bool is_written = false) const;
[[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
PixelFormat format);
@@ -802,7 +803,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
@@ -842,7 +843,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
const auto& cbufs = launch_desc.const_buffer_config;
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
}
template <class P>
@@ -1988,11 +1989,26 @@ void BufferCache<P>::NotifyBufferDeletion() {
template <class P>
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
u32 cbuf_index,
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
const auto size = [&]() {
const bool is_nvn_cbuf = cbuf_index == 0;
// The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
if (is_nvn_cbuf) {
return gpu_memory->Read<u32>(ssbo_addr + 8);
}
// Other titles (notably Doom Eternal) may use STG/LDG on buffer addresses in custom defined
// cbufs, which do not store the sizes adjacent to the addresses, so use the fully
// mapped buffer size for now.
const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
LOG_INFO(HW_GPU, "Binding storage buffer for cbuf index {}, MemoryLayoutSize 0x{:X}",
cbuf_index, memory_layout_size);
return memory_layout_size;
}();
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
return NULL_BINDING;
}
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);

View File

@@ -197,7 +197,7 @@ struct GPU::Impl {
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
if (Settings::values.use_fast_gpu_time.GetValue()) {
nanoseconds /= 256;
}

View File

@@ -112,13 +112,17 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
return GL_NONE;
}
GLenum TextureMode(PixelFormat format, bool is_first) {
GLenum TextureMode(PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
bool any_r =
std::ranges::any_of(swizzle, [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
// R = depth, G = stencil
return any_r ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
case PixelFormat::S8_UINT_D24_UNORM:
return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
// R = stencil, G = depth
return any_r ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
default:
ASSERT(false);
return GL_DEPTH_COMPONENT;
@@ -208,8 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
case PixelFormat::D32_FLOAT_S8_UINT:
case PixelFormat::S8_UINT_D24_UNORM:
UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
TextureMode(format, swizzle[0] == SwizzleSource::R));
glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, TextureMode(format, swizzle));
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
break;
case PixelFormat::A5B5G5R1_UNORM: {

View File

@@ -238,7 +238,7 @@ private:
return indices;
}
void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
@@ -278,7 +278,7 @@ private:
return indices;
}
void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) override {
switch (index_type) {
case VK_INDEX_TYPE_UINT8_EXT:
std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);

View File

@@ -1294,7 +1294,7 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}

View File

@@ -189,13 +189,16 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
if (info.IsRenderTarget()) {
return ImageAspectMask(info.format);
}
const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
bool any_r =
std::ranges::any_of(info.Swizzle(), [](SwizzleSource s) { return s == SwizzleSource::R; });
switch (info.format) {
case PixelFormat::D24_UNORM_S8_UINT:
case PixelFormat::D32_FLOAT_S8_UINT:
return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
// R = depth, G = stencil
return any_r ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
case PixelFormat::S8_UINT_D24_UNORM:
return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
// R = stencil, G = depth
return any_r ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
case PixelFormat::D16_UNORM:
case PixelFormat::D32_FLOAT:
return VK_IMAGE_ASPECT_DEPTH_BIT;
@@ -1769,7 +1772,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
.borderColor =
arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
.unnormalizedCoordinates = VK_FALSE,
});
}

View File

@@ -1244,6 +1244,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
if (overlap.info.num_samples != new_image.info.num_samples) {
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
continue;
} else {
runtime.CopyImage(new_image, overlap, std::move(copies));
}

View File

@@ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "common/microprofile.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
#ifdef _WIN32
#include "common/windows/timer_resolution.h"
#endif
#ifdef ARCHITECTURE_x86_64
#include "common/x64/cpu_detect.h"
#endif
@@ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan
LOG_INFO(Frontend, "Host RAM: {:.2f} GiB",
Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB});
LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB});
#ifdef _WIN32
LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms",
std::chrono::duration_cast<std::chrono::duration<f64, std::milli>>(
Common::Windows::SetCurrentTimerResolutionToMaximum())
.count());
#endif
UpdateWindowTitle();
show();

View File

@@ -42,6 +42,8 @@
#include <windows.h>
#include <shellapi.h>
#include "common/windows/timer_resolution.h"
#endif
#undef _UNICODE
@@ -314,6 +316,8 @@ int main(int argc, char** argv) {
#ifdef _WIN32
LocalFree(argv_w);
Common::Windows::SetCurrentTimerResolutionToMaximum();
#endif
MicroProfileOnThreadCreate("EmuThread");