Compare commits

...

19 Commits

Author SHA1 Message Date
Fernando Sahmkow
8a6e6465a7 Rasterizer: Refactor inlineToMemory. 2022-02-01 01:47:28 +01:00
Fernando Sahmkow
d0a5a48948 GPU: Improve syncing. 2022-01-29 23:02:04 +01:00
Fernando Sahmkow
4258d515e6 Rasterizer: Implement Inline2Memory Acceleration. 2022-01-29 22:53:27 +01:00
Fernando Sahmkow
f54280dafd Inline2Memory: Flush before writting buffer. 2022-01-29 17:42:28 +01:00
Morph
11099dda2e Merge pull request #7791 from german77/wall_clock
wall_clock: Use standard wall clock if rtsc frequency is too low
2022-01-28 20:04:24 -05:00
Morph
64a68ccbb4 Merge pull request #7800 from ameerj/spirv-int64-storage
spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics
2022-01-28 20:03:50 -05:00
ameerj
4790ba7839 spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics
Some drivers do not support 64-bit atomics, and fallback to atomically modifying U32x2 vectors. This change ensures that U32x2 storage vectors are defined in the spir-v shader when 64-bit atomics are used.

Fixes a hang on some devices, notably Intel GPUs, when booting Pokemon Legends Arceus
2022-01-28 19:00:04 -05:00
Morph
1900abde13 Merge pull request #7784 from german77/ds5
input_common: Add DS5 to HD rumble list
2022-01-28 18:36:28 -05:00
Morph
60b5670577 Merge pull request #7787 from bunnei/scheduler-deadlock-fix
hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated.
2022-01-28 18:30:29 -05:00
Morph
b00406c8e4 Merge pull request #7788 from ameerj/stream-buffer-begin
buffer_cache: Reduce stream buffer allocations when expanding from the left
2022-01-28 18:30:01 -05:00
Morph
8dea7fa129 Merge pull request #7786 from ameerj/vmnmx-sel
video_minimum_maximum: Implement src operand selectors
2022-01-28 18:24:56 -05:00
Morph
2241d8c971 Merge pull request #7799 from ameerj/amd-xfb
emit_spirv: Add Xfb execution mode when transform feedback is used
2022-01-28 17:55:17 -05:00
ameerj
beaf7654bb emit_spirv: Add Xfb execution mode when transform feedback is used
Fixes Transform Feedback on Vulkan AMD drivers.
2022-01-28 16:32:48 -05:00
bunnei
0dec42431f Merge pull request #7770 from german77/motion-threshold
input_common: Add option to configure gyro threshold
2022-01-27 15:44:04 -08:00
german77
e4c63d432d wall_clock: use standard wall clock if rtsc frequency is too low 2022-01-27 17:07:52 -06:00
ameerj
f300a1d54b buffer_cache: Reduce stream buffer allocations when expanding from the left
The existing stream buffer optimization accounts for size increases at the end of the allocated buffer.
This adds the same optimization, increasing the size from the beginning of the buffer as well to reduce buffer allocations when expanding the same buffer from the left.
2022-01-27 15:31:43 -05:00
bunnei
3a1a3dd0db hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated.
- Previously, it was possible for a thread migration to occur from core A to core B.
- Next, core B waits on a guest lock that must be released by a thread queued for core A.
- Meanwhile, core A is still waiting on the core B's current thread lock - resulting in a deadlock.
- Fix this by try-locking the thread lock.
- Fixes softlocks in FF8 and Pokemon Legends Arceus.
2022-01-27 12:17:14 -08:00
Narr the Reg
fd1cef5616 input_common: Add DS5 to HD rumble list 2022-01-26 21:49:32 -06:00
german77
ebf19616f4 input_common: Add option to configure gyro threshold 2022-01-23 21:54:33 -06:00
26 changed files with 201 additions and 46 deletions

View File

@@ -72,7 +72,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
if (caps.invariant_tsc) {
rtsc_frequency = EstimateRDTSCFrequency();
}
if (rtsc_frequency == 0) {
// Fallback to StandardWallClock if rtsc period is higher than a nano second
if (rtsc_frequency <= 1000000000) {
return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
emulated_clock_frequency);
} else {

View File

@@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback
raw_status.gyro.y.value,
raw_status.gyro.z.value,
});
emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold);
emulated.UpdateRotation(raw_status.delta_timestamp);
emulated.UpdateOrientation(raw_status.delta_timestamp);
force_update_motion = raw_status.force_update;

View File

@@ -10,7 +10,7 @@ namespace Core::HID {
MotionInput::MotionInput() {
// Initialize PID constants with default values
SetPID(0.3f, 0.005f, 0.0f);
SetGyroThreshold(0.00005f);
SetGyroThreshold(0.007f);
}
void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) {
@@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) {
gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f);
}
if (gyro.Length2() < gyro_threshold) {
if (gyro.Length() < gyro_threshold) {
gyro = {};
} else {
only_accelerometer = false;

View File

@@ -258,7 +258,7 @@ private:
private:
constexpr void ClearAffinityBit(u64& affinity, s32 core) {
affinity &= ~(u64(1) << core);
affinity &= ~(UINT64_C(1) << core);
}
constexpr s32 GetNextCore(u64& affinity) {

View File

@@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) {
}
void KScheduler::Reload(KThread* thread) {
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr");
LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName());
if (thread) {
ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
cpu_core.LoadContext(thread->GetContext32());
cpu_core.LoadContext(thread->GetContext64());
cpu_core.SetTlsAddress(thread->GetTLSAddress());
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
cpu_core.ClearExclusiveState();
}
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
cpu_core.LoadContext(thread->GetContext32());
cpu_core.LoadContext(thread->GetContext64());
cpu_core.SetTlsAddress(thread->GetTLSAddress());
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
cpu_core.ClearExclusiveState();
}
void KScheduler::SwitchContextStep2() {
// Load context of new thread
Reload(current_thread.load());
Reload(GetCurrentThread());
RescheduleCurrentCore();
}
@@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() {
KThread* previous_thread = GetCurrentThread();
KThread* next_thread = state.highest_priority_thread;
state.needs_scheduling = false;
state.needs_scheduling.store(false);
// We never want to schedule a null thread, so use the idle thread if we don't have a next.
if (next_thread == nullptr) {
next_thread = idle_thread;
}
if (next_thread->GetCurrentCore() != core_id) {
next_thread->SetCurrentCore(core_id);
}
// We never want to schedule a dummy thread, as these are only used by host threads for locking.
if (next_thread->GetThreadType() == ThreadType::Dummy) {
ASSERT_MSG(false, "Dummy threads should never be scheduled!");
@@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() {
return;
}
if (next_thread->GetCurrentCore() != core_id) {
next_thread->SetCurrentCore(core_id);
}
current_thread.store(next_thread);
// Update the CPU time tracking variables.
KProcess* const previous_process = system.Kernel().CurrentProcess();
UpdateLastContextSwitchTime(previous_thread, previous_process);
// Save context for previous thread
@@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() {
std::shared_ptr<Common::Fiber>* old_context;
old_context = &previous_thread->GetHostContext();
// Set the new thread.
current_thread.store(next_thread);
guard.Unlock();
Common::Fiber::YieldTo(*old_context, *switch_fiber);
@@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() {
do {
auto next_thread = current_thread.load();
if (next_thread != nullptr) {
next_thread->context_guard.Lock();
if (next_thread->GetRawState() != ThreadState::Runnable) {
const auto locked = next_thread->context_guard.TryLock();
if (state.needs_scheduling.load()) {
next_thread->context_guard.Unlock();
break;
}
@@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() {
next_thread->context_guard.Unlock();
break;
}
if (!locked) {
continue;
}
}
auto thread = next_thread ? next_thread : idle_thread;
Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext());

View File

@@ -109,8 +109,9 @@ public:
bool HasHDRumble() const {
if (sdl_controller) {
return (SDL_GameControllerGetType(sdl_controller.get()) ==
SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO);
const auto type = SDL_GameControllerGetType(sdl_controller.get());
return (type == SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO) ||
(type == SDL_CONTROLLER_TYPE_PS5);
}
return false;
}

View File

@@ -504,9 +504,10 @@ private:
class InputFromMotion final : public Common::Input::InputDevice {
public:
explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_,
explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_,
InputEngine* input_engine_)
: identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) {
: identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_),
input_engine(input_engine_) {
UpdateCallback engine_callback{[this]() { OnChange(); }};
const InputIdentifier input_identifier{
.identifier = identifier,
@@ -525,8 +526,9 @@ public:
const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor);
Common::Input::MotionStatus status{};
const Common::Input::AnalogProperties properties = {
.deadzone = 0.001f,
.deadzone = 0.0f,
.range = 1.0f,
.threshold = gyro_threshold,
.offset = 0.0f,
};
status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties};
@@ -551,6 +553,7 @@ public:
private:
const PadIdentifier identifier;
const int motion_sensor;
const float gyro_threshold;
int callback_key;
InputEngine* input_engine;
};
@@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice(
if (params.Has("motion")) {
const auto motion_sensor = params.Get("motion", 0);
const auto gyro_threshold = params.Get("threshold", 0.007f);
input_engine->PreSetController(identifier);
input_engine->PreSetMotion(identifier, motion_sensor);
return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get());
return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold,
input_engine.get());
}
const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f);

View File

@@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
}
}
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
if (ctx.runtime_info.xfb_varyings.empty()) {
return;
}
ctx.AddCapability(spv::Capability::TransformFeedback);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb);
}
void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
if (info.uses_sampled_1d) {
ctx.AddCapability(spv::Capability::Sampled1D);
@@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
if (info.uses_sample_id) {
ctx.AddCapability(spv::Capability::SampleRateShading);
}
if (!ctx.runtime_info.xfb_varyings.empty()) {
ctx.AddCapability(spv::Capability::TransformFeedback);
}
if (info.uses_derivatives) {
ctx.AddCapability(spv::Capability::DerivativeControl);
}
@@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
SetupSignedNanCapabilities(profile, program, ctx, main);
}
SetupCapabilities(profile, program.info, ctx);
SetupTransformFeedbackCapabilities(ctx, main);
PatchPhiNodes(program, ctx);
return ctx.Assemble();
}

View File

@@ -74,7 +74,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
}
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
@@ -267,7 +267,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
}
LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
binding, offset, sizeof(u32[2]))};
const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};

View File

@@ -688,7 +688,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::StorageAtomicAnd64:
case IR::Opcode::StorageAtomicOr64:
case IR::Opcode::StorageAtomicXor64:
info.used_storage_buffer_types |= IR::Type::U64;
info.used_storage_buffer_types |= IR::Type::U64 | IR::Type::U32x2;
info.uses_int64_bit_atomics = true;
break;
case IR::Opcode::BindlessImageAtomicIAdd32:

View File

@@ -131,6 +131,8 @@ public:
void DownloadMemory(VAddr cpu_addr, u64 size);
bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
void DisableGraphicsUniformBuffer(size_t stage, u32 index);
@@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
@@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
buffer.ForEachDownloadRangeAndClear(
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
if (is_accuracy_normal) {
return;
}
const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) {
const u64 new_offset = start - buffer_addr;
@@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
const IntervalType base_interval{cpu_addr, cpu_addr + size};
common_ranges.add(base_interval);
const bool is_accuracy_high =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
if (!is_async && !is_accuracy_high) {
if (!is_async) {
return;
}
uncommitted_ranges.add(base_interval);
@@ -1474,6 +1479,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
// When this memory region has been joined a bunch of times, we assume it's being used
// as a stream buffer. Increase the size to skip constantly recreating buffers.
has_stream_leap = true;
begin -= PAGE_SIZE * 256;
cpu_addr = begin;
end += PAGE_SIZE * 256;
}
}
@@ -1641,6 +1648,42 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
}
template <class P>
bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
std::span<u8> inlined_buffer) {
const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
if (!is_dirty) {
return false;
}
if (!IsRegionGpuModified(dest_address, copy_size)) {
return false;
}
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval);
BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS) {
std::array copies{BufferCopy{
.src_offset = 0,
.dst_offset = buffer.Offset(dest_address),
.size = copy_size,
}};
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
}
return true;
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());

View File

@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
@@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_)
State::~State() = default;
void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
@@ -32,7 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);

View File

@@ -12,6 +12,10 @@ namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines::Upload {
struct Registers {
@@ -60,6 +64,9 @@ public:
void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
private:
u32 write_offset = 0;
u32 copy_size = 0;
@@ -68,6 +75,7 @@ private:
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
VideoCore::RasterizerInterface* rasterizer = nullptr;
};
} // namespace Tegra::Engines::Upload

View File

@@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default;
void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
upload_state.BindRasterizer(rasterizer);
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View File

@@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
upload_state.BindRasterizer(rasterizer_);
}
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerMemory register, increase the size of the Regs structure");

View File

@@ -22,6 +22,10 @@ namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
/**
@@ -38,6 +42,9 @@ public:
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
~KeplerMemory() override;
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;

View File

@@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default;
void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
upload_state.BindRasterizer(rasterizer_);
}
void Maxwell3D::InitializeRegisterDefaults() {

View File

@@ -1557,7 +1557,8 @@ private:
static constexpr u32 null_cb_data = 0xFFFFFFFF;
struct CBDataState {
std::array<std::array<u32, 0x4000>, 16> buffer;
static constexpr size_t inline_size = 0x4000;
std::array<std::array<u32, inline_size>, 16> buffer;
u32 current{null_cb_data};
u32 id{null_cb_data};
u32 start_pos{};

View File

@@ -59,6 +59,7 @@ struct GPU::Impl {
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
}
@@ -502,8 +503,13 @@ struct GPU::Impl {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
break;
case BufferMethods::UnkCacheFlush:
rasterizer->SyncGuestHost();
break;
case BufferMethods::WrcacheFlush:
rasterizer->SignalReference();
break;
case BufferMethods::FenceValue:
break;
case BufferMethods::RefCnt:
@@ -513,7 +519,7 @@ struct GPU::Impl {
ProcessFenceActionMethod();
break;
case BufferMethods::WaitForInterrupt:
ProcessWaitForInterruptMethod();
rasterizer->WaitForIdle();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();

View File

@@ -143,6 +143,8 @@ public:
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
@@ -153,8 +155,6 @@ private:
void TryLockPage(PageEntry page_entry, std::size_t size);
void TryUnlockPage(PageEntry page_entry, std::size_t size);
void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
bool is_safe) const;
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,

View File

@@ -123,6 +123,9 @@ public:
[[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) = 0;
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {

View File

@@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
return accelerate_dma;
}
void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) {
auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
gpu_memory.WriteBlock(address, memory.data(), copy_size);
return;
}
gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
}
{
std::scoped_lock lock_texture{texture_cache.mutex};
texture_cache.WriteMemory(*cpu_addr, copy_size);
}
shader_cache.InvalidateRegion(*cpu_addr, copy_size);
query_cache.InvalidateRegion(*cpu_addr, copy_size);
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {

View File

@@ -106,6 +106,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,

View File

@@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
return accelerate_dma;
}
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) {
auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
gpu_memory.WriteBlock(address, memory.data(), copy_size);
return;
}
gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
}
{
std::scoped_lock lock_texture{texture_cache.mutex};
texture_cache.WriteMemory(*cpu_addr, copy_size);
}
pipeline_cache.InvalidateRegion(*cpu_addr, copy_size);
query_cache.InvalidateRegion(*cpu_addr, copy_size);
}
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {

View File

@@ -99,6 +99,8 @@ public:
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,

View File

@@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
connect(button, &QPushButton::customContextMenuRequested,
[=, this](const QPoint& menu_location) {
QMenu context_menu;
Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id);
context_menu.addAction(tr("Clear"), [&] {
emulated_controller->SetMotionParam(motion_id, {});
motion_map[motion_id]->setText(tr("[not set]"));
});
if (param.Has("motion")) {
context_menu.addAction(tr("Set gyro threshold"), [&] {
const int gyro_threshold =
static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f);
const int new_threshold = QInputDialog::getInt(
this, tr("Set threshold"), tr("Choose a value between 0% and 100%"),
gyro_threshold, 0, 100);
param.Set("threshold", new_threshold / 1000.0f);
emulated_controller->SetMotionParam(motion_id, param);
});
}
context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location));
});
}