Compare commits

..

6 Commits

Author SHA1 Message Date
ReinUsesLisp
9ebc27234d bootmanager: Bypass input focus issues 2019-03-25 17:10:34 -03:00
ReinUsesLisp
bbb396d7f1 bootmanager: Bypass resizing issue 2019-03-25 17:10:34 -03:00
ReinUsesLisp
9ff72ca9f2 bootmanager: Delete container to avoid crash on game restarting
While we are at it, remove nullptr checks for deletion, since the C++
standard defines that delete does it by its own
2019-03-25 17:10:34 -03:00
James Rowe
5f2d9f282a QT: Hide GLWidget immediately after showing.
With the loading screen merged, we don't want to actually show at this
point, but it still needs to be shown to actually create the context.
Turns out you can just show and hide it immediately and it'll work.
2019-01-21 16:21:44 -07:00
James Rowe
f2a2f818b6 SDL Frontend: Add shared context support 2019-01-21 16:00:01 -07:00
James Rowe
c6a0ab9792 QT Frontend: Migrate to QOpenGLWindow 2019-01-21 16:00:01 -07:00
86 changed files with 4755 additions and 6961 deletions

View File

@@ -1,27 +1,16 @@
<!--
Please keep in mind yuzu is EXPERIMENTAL SOFTWARE.
Please read the FAQ:
https://yuzu-emu.org/wiki/faq/
Please read the FAQ: https://yuzu-emu.org/wiki/faq/
THIS IS NOT A SUPPORT FORUM, FOR SUPPORT GO TO:
https://community.citra-emu.org/
When submitting an issue, please do the following:
If the FAQ does not answer your question, please go to:
https://community.citra-emu.org/
When submitting an issue, please check the following:
- You have read the above.
- You have provided the version (commit hash) of yuzu you are using.
- You have provided sufficient detail for the issue to be reproduced.
- You have provided system specs (if relevant).
- Please also provide:
- For any issues, a log file
- Provide the version (commit hash) of yuzu you are using.
- Provide sufficient detail for the issue to be reproduced.
- Provide:
- For crashes, a backtrace.
- For graphical issues, comparison screenshots with real hardware.
- For emulation inaccuracies, a test-case (if able).
-->

View File

@@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
}
// Implementation of a volume slider with a dynamic range of 60 dB
const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f;
const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f};
for (auto& sample : samples) {
sample = static_cast<s16>(sample * volume_scale_factor);
}

View File

@@ -95,8 +95,6 @@ add_library(core STATIC
frontend/framebuffer_layout.cpp
frontend/framebuffer_layout.h
frontend/input.h
frontend/scope_acquire_window_context.cpp
frontend/scope_acquire_window_context.h
gdbstub/gdbstub.cpp
gdbstub/gdbstub.h
hle/ipc.h

View File

@@ -12,6 +12,23 @@
namespace Core::Frontend {
/**
* Represents a graphics context that can be used for background computation or drawing. If the
* graphics backend doesn't require the context, then the implementation of these methods can be
* stubs
*/
class GraphicsContext {
public:
/// Makes the graphics context current for the caller thread
virtual void MakeCurrent() = 0;
/// Releases (dunno if this is the "right" word) the context from the caller thread
virtual void DoneCurrent() = 0;
/// Swap buffers to display the next frame
virtual void SwapBuffers() = 0;
};
/**
* Abstraction class used to provide an interface between emulation code and the frontend
* (e.g. SDL, QGLWidget, GLFW, etc...).
@@ -30,7 +47,7 @@ namespace Core::Frontend {
* - DO NOT TREAT THIS CLASS AS A GUI TOOLKIT ABSTRACTION LAYER. That's not what it is. Please
* re-read the upper points again and think about it if you don't see this.
*/
class EmuWindow {
class EmuWindow : public GraphicsContext {
public:
/// Data structure to store emuwindow configuration
struct WindowConfig {
@@ -40,17 +57,21 @@ public:
std::pair<unsigned, unsigned> min_client_area_size;
};
/// Swap buffers to display the next frame
virtual void SwapBuffers() = 0;
/// Polls window events
virtual void PollEvents() = 0;
/// Makes the graphics context current for the caller thread
virtual void MakeCurrent() = 0;
/// Releases (dunno if this is the "right" word) the GLFW context from the caller thread
virtual void DoneCurrent() = 0;
/**
* Returns a GraphicsContext that the frontend provides that is shared with the emu window. This
* context can be used from other threads for background graphics computation. If the frontend
* is using a graphics backend that doesn't need anything specific to run on a different thread,
* then it can use a stubbed implemenation for GraphicsContext.
*
* If the return value is null, then the core should assume that the frontend cannot provide a
* Shared Context
*/
virtual std::unique_ptr<GraphicsContext> CreateSharedContext() const {
return nullptr;
}
/**
* Signal that a touch pressed event has occurred (e.g. mouse click pressed)

View File

@@ -1,18 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/frontend/emu_window.h"
#include "core/frontend/scope_acquire_window_context.h"
namespace Core::Frontend {
ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_)
: emu_window{emu_window_} {
emu_window.MakeCurrent();
}
ScopeAcquireWindowContext::~ScopeAcquireWindowContext() {
emu_window.DoneCurrent();
}
} // namespace Core::Frontend

View File

@@ -1,23 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Core::Frontend {
class EmuWindow;
/// Helper class to acquire/release window context within a given scope
class ScopeAcquireWindowContext : NonCopyable {
public:
explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window);
~ScopeAcquireWindowContext();
private:
Core::Frontend::EmuWindow& emu_window;
};
} // namespace Core::Frontend

View File

@@ -216,11 +216,6 @@ private:
/// Push ///
template <>
inline void ResponseBuilder::Push(s32 value) {
cmdbuf[index++] = static_cast<u32>(value);
}
template <>
inline void ResponseBuilder::Push(u32 value) {
cmdbuf[index++] = value;
@@ -239,22 +234,6 @@ inline void ResponseBuilder::Push(ResultCode value) {
Push<u32>(0);
}
template <>
inline void ResponseBuilder::Push(s8 value) {
PushRaw(value);
}
template <>
inline void ResponseBuilder::Push(s16 value) {
PushRaw(value);
}
template <>
inline void ResponseBuilder::Push(s64 value) {
Push(static_cast<u32>(value));
Push(static_cast<u32>(value >> 32));
}
template <>
inline void ResponseBuilder::Push(u8 value) {
PushRaw(value);

View File

@@ -597,7 +597,6 @@ enum class BreakType : u32 {
PostNROLoad = 4,
PreNROUnload = 5,
PostNROUnload = 6,
CppException = 7,
};
struct BreakReason {
@@ -670,9 +669,6 @@ static void Break(u32 reason, u64 info1, u64 info2) {
"Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1,
info2);
break;
case BreakType::CppException:
LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered.");
break;
default:
LOG_WARNING(
Debug_Emulated,

View File

@@ -5,6 +5,7 @@
#include <algorithm>
#include <optional>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -21,6 +22,7 @@
#include "core/hle/service/nvflinger/nvflinger.h"
#include "core/perf_stats.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Service::NVFlinger {
@@ -28,6 +30,12 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
NVFlinger::NVFlinger() {
// Add the different displays to the list of displays.
displays.emplace_back(0, "Default");
displays.emplace_back(1, "External");
displays.emplace_back(2, "Edid");
displays.emplace_back(3, "Internal");
// Schedule the screen composition events
composition_event =
CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -47,13 +55,13 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
}
u64 NVFlinger::OpenDisplay(std::string_view name) {
LOG_DEBUG(Service, "Opening \"{}\" display", name);
LOG_WARNING(Service, "Opening display {}", name);
// TODO(Subv): Currently we only support the Default display.
ASSERT(name == "Default");
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
ASSERT(itr != displays.end());
@@ -65,8 +73,8 @@ u64 NVFlinger::CreateLayer(u64 display_id) {
ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment");
const u64 layer_id = next_layer_id++;
const u32 buffer_queue_id = next_buffer_queue_id++;
u64 layer_id = next_layer_id++;
u32 buffer_queue_id = next_buffer_queue_id++;
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
display.layers.emplace_back(layer_id, buffer_queue);
buffer_queues.emplace_back(std::move(buffer_queue));
@@ -83,16 +91,16 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id
}
std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[&](const auto& queue) { return queue->GetId() == id; });
auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[&](const auto& queue) { return queue->GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
Display& NVFlinger::GetDisplay(u64 display_id) {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
ASSERT(itr != displays.end());
return *itr;
@@ -101,8 +109,8 @@ Display& NVFlinger::GetDisplay(u64 display_id) {
Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) {
auto& display = GetDisplay(display_id);
const auto itr = std::find_if(display.layers.begin(), display.layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
auto itr = std::find_if(display.layers.begin(), display.layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
ASSERT(itr != display.layers.end());
return *itr;
@@ -137,7 +145,7 @@ void NVFlinger::Compose() {
continue;
}
const auto& igbp_buffer = buffer->get().igbp_buffer;
auto& igbp_buffer = buffer->get().igbp_buffer;
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based

View File

@@ -4,7 +4,6 @@
#pragma once
#include <array>
#include <memory>
#include <string>
#include <string_view>
@@ -85,13 +84,7 @@ private:
std::shared_ptr<Nvidia::Module> nvdrv;
std::array<Display, 5> displays{{
{0, "Default"},
{1, "External"},
{2, "Edid"},
{3, "Internal"},
{4, "Null"},
}};
std::vector<Display> displays;
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
/// Id to use for the next layer that is created, this counter is shared among all displays.

View File

@@ -13,7 +13,7 @@ public:
explicit BootMode() : ServiceFramework{"pm:bm"} {
static const FunctionInfo functions[] = {
{0, &BootMode::GetBootMode, "GetBootMode"},
{1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"},
{1, nullptr, "SetMaintenanceBoot"},
};
RegisterHandlers(functions);
}
@@ -24,19 +24,8 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.PushEnum(boot_mode);
rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode
}
void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_PM, "called");
boot_mode = SystemBootMode::Maintenance;
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
SystemBootMode boot_mode = SystemBootMode::Normal;
};
class DebugMonitor final : public ServiceFramework<DebugMonitor> {

View File

@@ -9,12 +9,7 @@ class ServiceManager;
}
namespace Service::PM {
enum class SystemBootMode {
Normal,
Maintenance,
};
enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 };
/// Registers all PM services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& service_manager);

View File

@@ -704,14 +704,13 @@ private:
rb.Push(RESULT_SUCCESS);
}
// This function currently does nothing but return a success error code in
// the vi library itself, so do the same thing, but log out the passed in values.
void SetLayerVisibility(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u64 layer_id = rp.Pop<u64>();
const bool visibility = rp.Pop<bool>();
LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility);
LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
visibility);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);

View File

@@ -74,33 +74,4 @@ void Apply() {
Service::HID::ReloadInputDevices();
}
template <typename T>
void LogSetting(const std::string& name, const T& value) {
LOG_INFO(Config, "{}: {}", name, value);
}
void LogSettings() {
LOG_INFO(Config, "yuzu Configuration:");
LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
LogSetting("System_EnableNfc", Settings::values.enable_nfc);
LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
LogSetting("System_CurrentUser", Settings::values.current_user);
LogSetting("System_LanguageIndex", Settings::values.language_index);
LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd);
LogSetting("DataStorage_NandDir", Settings::values.nand_dir);
LogSetting("DataStorage_SdmcDir", Settings::values.sdmc_dir);
LogSetting("Debugging_UseGdbstub", Settings::values.use_gdbstub);
LogSetting("Debugging_GdbstubPort", Settings::values.gdbstub_port);
LogSetting("Debugging_ProgramArgs", Settings::values.program_args);
}
} // namespace Settings

View File

@@ -425,5 +425,4 @@ struct Values {
} extern values;
void Apply();
void LogSettings();
} // namespace Settings

View File

@@ -59,35 +59,6 @@ add_library(video_core STATIC
renderer_opengl/renderer_opengl.h
renderer_opengl/utils.cpp
renderer_opengl/utils.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
shader/decode/bfi.cpp
shader/decode/shift.cpp
shader/decode/arithmetic_integer.cpp
shader/decode/arithmetic_integer_immediate.cpp
shader/decode/arithmetic_half.cpp
shader/decode/arithmetic_half_immediate.cpp
shader/decode/ffma.cpp
shader/decode/hfma2.cpp
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp
shader/decode/predicate_set_register.cpp
shader/decode/predicate_set_predicate.cpp
shader/decode/register_set_predicate.cpp
shader/decode/float_set.cpp
shader/decode/integer_set.cpp
shader/decode/half_set.cpp
shader/decode/video.cpp
shader/decode/xmad.cpp
shader/decode/other.cpp
shader/decode.cpp
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
surface.cpp
surface.h
textures/astc.cpp

View File

@@ -37,7 +37,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.viewports[viewport].depth_range_near = 0.0f;
regs.viewports[viewport].depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
// so initialize blend registers with sane values
regs.blend.equation_rgb = Regs::Blend::Equation::Add;
@@ -67,7 +66,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_func_func = Regs::ComparisonOp::Always;
regs.stencil_back_func_mask = 0xFFFFFFFF;
regs.stencil_back_mask = 0xFFFFFFFF;
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1).
regs.point_size = 1.0f;
@@ -80,9 +78,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.color_mask[color_mask].B.Assign(1);
regs.color_mask[color_mask].A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
regs.rt_separate_frag_data = 1;
}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {

View File

@@ -208,8 +208,6 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class StoreType : u64 {
@@ -399,10 +397,6 @@ struct IpaMode {
bool operator!=(const IpaMode& a) const {
return !operator==(a);
}
bool operator<(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) <
std::tie(a.interpolation_mode, a.sampling_mode);
}
};
enum class SystemVariable : u64 {
@@ -650,7 +644,6 @@ union Instruction {
BitField<37, 2, HalfPrecision> precision;
BitField<32, 1, u64> saturate;
BitField<31, 1, u64> negate_b;
BitField<30, 1, u64> negate_c;
BitField<35, 2, HalfType> type_c;
} rr;
@@ -786,12 +779,6 @@ union Instruction {
BitField<44, 2, u64> unknown;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> immediate_offset;
} ldg;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
@@ -1444,7 +1431,6 @@ public:
PredicateSetRegister,
RegisterSetPredicate,
Conversion,
Video,
Xmad,
Unknown,
};
@@ -1576,8 +1562,8 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),

View File

@@ -106,7 +106,7 @@ struct Header {
} ps;
};
u64 GetLocalMemorySize() const {
u64 GetLocalMemorySize() {
return (common1.shader_local_memory_low_size |
(common2.shader_local_memory_high_size << 24));
}

View File

@@ -3,8 +3,6 @@
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -126,36 +124,9 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
}
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
enum class BufferMethods {
BindObject = 0x0,
Nop = 0x2,
SemaphoreAddressHigh = 0x4,
SemaphoreAddressLow = 0x5,
SemaphoreSequence = 0x6,
SemaphoreTrigger = 0x7,
NotifyIntr = 0x8,
WrcacheFlush = 0x9,
Unk28 = 0xA,
Unk2c = 0xB,
RefCnt = 0x14,
SemaphoreAcquire = 0x1A,
SemaphoreRelease = 0x1B,
Unk70 = 0x1C,
Unk74 = 0x1D,
Unk78 = 0x1E,
Unk7c = 0x1F,
Yield = 0x20,
NonPullerMethods = 0x40,
};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
BindObject = 0,
CountBufferMethods = 0x40,
};
void GPU::CallMethod(const MethodCall& method_call) {
@@ -164,78 +135,20 @@ void GPU::CallMethod(const MethodCall& method_call) {
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
return;
}
}
bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
const auto method = static_cast<BufferMethods>(method_call.method);
return method >= BufferMethods::NonPullerMethods;
}
if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
// TODO(Subv): Research and implement these methods.
LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
return;
}
void GPU::CallPullerMethod(const MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
case BufferMethods::RefCnt:
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::WrcacheFlush: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::Unk2c: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
static_cast<u32>(method));
break;
}
}
void GPU::CallEngineMethod(const MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
@@ -259,76 +172,4 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
}
}
void GPU::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
}
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = CoreTiming::GetTicks();
Memory::WriteBlock(*address, &block, sizeof(block));
} else {
const auto address =
memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void GPU::ProcessSemaphoreRelease() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
Memory::Write32(*address, regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
} // namespace Tegra

View File

@@ -156,46 +156,6 @@ public:
/// Returns a const reference to the GPU DMA pusher.
const Tegra::DmaPusher& DmaPusher() const;
struct Regs {
static constexpr size_t NUM_REGS = 0x100;
union {
struct {
INSERT_PADDING_WORDS(0x4);
struct {
u32 address_high;
u32 address_low;
GPUVAddr SmaphoreAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} smaphore_address;
u32 semaphore_sequence;
u32 semaphore_trigger;
INSERT_PADDING_WORDS(0xC);
// The puser and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
INSERT_PADDING_WORDS(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
INSERT_PADDING_WORDS(0xE4);
// Puller state
u32 acquire_mode;
u32 acquire_source;
u32 acquire_active;
u32 acquire_timeout;
u32 acquire_value;
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
private:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
@@ -213,37 +173,6 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(smaphore_address, 0x4);
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
ASSERT_REG_POSITION(reference_count, 0x14);
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
ASSERT_REG_POSITION(semaphore_release, 0x1B);
ASSERT_REG_POSITION(acquire_mode, 0x100);
ASSERT_REG_POSITION(acquire_source, 0x101);
ASSERT_REG_POSITION(acquire_active, 0x102);
ASSERT_REG_POSITION(acquire_timeout, 0x103);
ASSERT_REG_POSITION(acquire_value, 0x104);
#undef ASSERT_REG_POSITION
} // namespace Tegra

View File

@@ -4,13 +4,8 @@
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -23,72 +18,7 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
}
void CachedGlobalRegion::Reload(u32 size_) {
constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
size = size_;
if (size > max_size) {
size = max_size;
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
max_size);
}
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
const auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
}
return search->second;
}
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size);
ReserveGlobalRegion(region);
}
region->Reload(size);
return region;
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
reserve[region->GetAddr()] = region;
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const GLShader::GlobalMemoryEntry& global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
ASSERT(cbuf_addr);
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
ASSERT(actual_addr);
// Look up global region in the cache based on address
GlobalRegion region = TryGet(*actual_addr);
if (!region) {
// No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size);
Register(region);
}
return region;
}
} // namespace OpenGL

View File

@@ -5,13 +5,9 @@
#pragma once
#include <memory>
#include <unordered_map>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -44,9 +40,6 @@ public:
return buffer.handle;
}
/// Reloads the global region from guest memory
void Reload(u32 size_);
// TODO(Rodrigo): When global memory is written (STG), implement flushing
void Flush() override {
UNIMPLEMENTED();
@@ -62,17 +55,6 @@ private:
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Gets the current specified shader stage program
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
void ReserveGlobalRegion(const GlobalRegion& region);
std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL

View File

@@ -297,7 +297,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
BaseBindings base_bindings;
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,35 +324,43 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the emulation info buffer
glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
// Bind the buffer
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
offset, static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
shader_program_manager->UseProgrammableVertexShader(program_handle);
case Maxwell::ShaderProgram::VertexB: {
shader_program_manager->UseProgrammableVertexShader(
shader->GetProgramHandle(primitive_mode));
break;
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseProgrammableGeometryShader(program_handle);
}
case Maxwell::ShaderProgram::Geometry: {
shader_program_manager->UseProgrammableGeometryShader(
shader->GetProgramHandle(primitive_mode));
break;
case Maxwell::ShaderProgram::Fragment:
shader_program_manager->UseProgrammableFragmentShader(program_handle);
}
case Maxwell::ShaderProgram::Fragment: {
shader_program_manager->UseProgrammableFragmentShader(
shader->GetProgramHandle(primitive_mode));
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
UNREACHABLE();
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -364,8 +375,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
base_bindings = next_bindings;
}
SyncClipEnabled(clip_distances);
@@ -500,7 +509,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
}
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// TODO(bunnei): Figure out how the below register works. According to envytools, this should be
// used to enable multiple render targets. However, it is left unset on all games that I have
// tested.
UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -634,6 +646,8 @@ void RasterizerOpenGL::Clear() {
return;
}
ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
regs.clear_buffers.RT.Value());
if (regs.clear_flags.scissor) {
@@ -667,6 +681,8 @@ void RasterizerOpenGL::DrawArrays() {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(state);
SyncColorMask();
SyncFragmentColorClampState();
@@ -915,14 +931,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffers;
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
std::array<GLuint, max_binds> bind_buffers;
@@ -965,70 +980,75 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
// Prepare values for multibind
bind_buffers[bindpoint] = buffer_cache.GetHandle();
bind_offsets[bindpoint] = const_buffer_offset;
bind_sizes[bindpoint] = size;
}
// The first binding is reserved for emulation values
const GLuint ubo_base_binding = base_bindings.cbuf + 1;
glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
return current_bindpoint + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
// TODO(Rodrigo): Use ARB_multi_bind here
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = base_bindings.gmem + bindpoint;
const auto& region = global_cache.GetGlobalRegion(entry, stage);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
const auto& entries = shader->GetShaderEntries().texture_samplers;
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
auto& unit = state.texture_units[current_bindpoint];
const u32 current_bindpoint = current_unit + bindpoint;
// Bind the uniform to the sampler.
glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
shader->GetUniformLocation(entry), current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
unit.texture = 0;
state.texture_units[current_bindpoint].texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
unit.texture =
const GLuint handle =
entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
state.texture_units[current_bindpoint].texture = handle;
state.texture_units[current_bindpoint].target = target;
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
MaxwellToGL::SwizzleSource(texture.tic.y_source);
state.texture_units[current_bindpoint].swizzle.b =
MaxwellToGL::SwizzleSource(texture.tic.z_source);
state.texture_units[current_bindpoint].swizzle.a =
MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
unit.texture = 0;
state.texture_units[current_bindpoint].texture = 0;
}
}
return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {

View File

@@ -127,18 +127,25 @@ private:
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/**
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/**
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
* @param current_unit The offset at which to start counting unused texture units.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);

View File

@@ -18,6 +18,7 @@
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/surface.h"
#include "video_core/textures/astc.h"
@@ -43,14 +44,14 @@ struct FormatTuple {
bool compressed;
};
static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
if (max_mip_level == 1) {
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
}
}
@@ -127,7 +128,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
params.identity = SurfaceClass::Uploaded;
switch (params.target) {
case SurfaceTarget::Texture1D:
@@ -167,7 +167,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
params.is_layered = SurfaceTargetIsLayered(params.target);
params.is_array = SurfaceTargetIsArray(params.target);
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
@@ -195,7 +194,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::RenderTarget;
params.depth = 1;
params.max_mip_level = 1;
params.is_layered = false;
@@ -231,7 +229,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::DepthBuffer;
params.depth = 1;
params.max_mip_level = 1;
params.is_layered = false;
@@ -260,7 +257,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::Copy;
params.depth = 1;
params.max_mip_level = 1;
params.rt = {};
@@ -528,43 +524,58 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)),
cached_size_in_bytes(params.size_in_bytes) {
texture.Create(gl_target);
texture.Create();
const auto& rect{params.GetRect()};
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
// Keep track of previous texture bindings
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = texture.handle;
cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
gl_is_compressed = format_tuple.compressed;
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width);
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height, params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
if (!format_tuple.compressed) {
// Only pre-create the texture for non-compressed textures.
switch (params.target) {
case SurfaceTarget::Texture1D:
glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth());
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight());
}
}
ApplyTextureDefaults(texture.handle, params.max_mip_level);
ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
SurfaceParams::SurfaceTargetName(params.target));
// Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -737,50 +748,63 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const auto& rect{params.GetRect(mip_map)};
// Load data from memory to the surface
const auto x0 = static_cast<GLint>(rect.left);
const auto y0 = static_cast<GLint>(rect.bottom);
auto buffer_offset =
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
std::size_t buffer_offset =
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
static_cast<std::size_t>(x0)) *
GetBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
const GLuint target_tex = texture.handle;
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = target_tex;
cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
glActiveTexture(GL_TEXTURE0);
if (tuple.compressed) {
switch (params.target) {
case SurfaceTarget::Texture2D:
glCompressedTextureSubImage2D(
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
&gl_buffer[mip_map][buffer_offset]);
glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture3D:
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
&gl_buffer[mip_map][buffer_offset]);
glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.depth), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
layer_size, &gl_buffer[mip_map][buffer_offset]);
glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
layer_size, &gl_buffer[mip_map][buffer_offset]);
buffer_offset += layer_size;
}
break;
@@ -789,43 +813,46 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glCompressedTextureSubImage2D(
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
static_cast<GLsizei>(params.size_in_bytes_gl),
&gl_buffer[mip_map][buffer_offset]);
}
} else {
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture3D:
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
std::size_t start = buffer_offset;
for (std::size_t face = 0; face < params.depth; ++face) {
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
buffer_offset += params.LayerSizeGL(mip_map);
}
break;
@@ -834,10 +861,9 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
}
}
@@ -847,18 +873,26 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
void CachedSurface::EnsureTextureView() {
if (texture_view.handle != 0)
return;
// Compressed texture are not being created with immutable storage
UNIMPLEMENTED_IF(gl_is_compressed);
const GLenum target{TargetLayer()};
const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
constexpr GLuint min_layer = 0;
constexpr GLuint min_level = 0;
glGenTextures(1, &texture_view.handle);
texture_view.Create();
glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
params.max_mip_level, 0, 1);
ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
reinterpret_cast<const GLint*>(swizzle.data()));
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = texture_view.handle;
cur_state.texture_units[0].target = target;
cur_state.Apply();
ApplyTextureDefaults(target, params.max_mip_level);
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
@@ -872,25 +906,6 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
}
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w) {
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
return;
}
swizzle = {new_x, new_y, new_z, new_w};
const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
if (texture_view.handle != 0) {
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
}
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {
read_framebuffer.Create();

View File

@@ -35,14 +35,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
struct SurfaceParams {
enum class SurfaceClass {
Uploaded,
RenderTarget,
DepthBuffer,
Copy,
};
static std::string SurfaceTargetName(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
@@ -218,48 +210,6 @@ struct SurfaceParams {
/// Initializes parameters for caching, should be called after everything has been initialized
void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
return "3D";
case SurfaceTarget::Texture1DArray:
return "1DArray";
case SurfaceTarget::Texture2DArray:
return "2DArray";
case SurfaceTarget::TextureCubemap:
return "Cube";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return fmt::format("TUK({})", static_cast<u32>(target));
}
}
std::string ClassName() const {
switch (identity) {
case SurfaceClass::Uploaded:
return "UP";
case SurfaceClass::RenderTarget:
return "RT";
case SurfaceClass::DepthBuffer:
return "DB";
case SurfaceClass::Copy:
return "CP";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
UNREACHABLE();
return fmt::format("CUK({})", static_cast<u32>(identity));
}
}
std::string IdentityString() const {
return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
}
bool is_tiled;
u32 block_width;
u32 block_height;
@@ -273,10 +223,8 @@ struct SurfaceParams {
u32 depth;
u32 unaligned_height;
SurfaceTarget target;
SurfaceClass identity;
u32 max_mip_level;
bool is_layered;
bool is_array;
bool srgb_conversion;
// Parameters used for caching
VAddr addr;
@@ -307,7 +255,6 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res;
res.state = params;
res.state.identity = {}; // Ignore the origin of the texture
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
res.state.rt = {}; // Ignore rt config in caching
return res;
@@ -347,7 +294,7 @@ public:
}
const OGLTexture& TextureLayer() {
if (params.is_array) {
if (params.is_layered) {
return Texture();
}
EnsureTextureView();
@@ -382,11 +329,6 @@ public:
// Upload data in gl_buffer to this surface's texture
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -398,8 +340,8 @@ private:
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};
bool gl_is_compressed{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {

View File

@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
namespace OpenGL {
void OGLTexture::Create(GLenum target) {
void OGLTexture::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glCreateTextures(target, 1, &handle);
glGenTextures(1, &handle);
}
void OGLTexture::Release() {

View File

@@ -28,7 +28,7 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
void Create(GLenum target);
void Create();
/// Deletes the internal OpenGL resource
void Release();

View File

@@ -10,15 +10,11 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using VideoCommon::Shader::ProgramCode;
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -28,31 +24,42 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
}
/// Gets the shader program code from memory for the specified address
static ProgramCode GetShaderCode(VAddr addr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
static GLShader::ProgramCode GetShaderCode(VAddr addr) {
GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
return program_code;
}
/// Gets the shader type from a Maxwell program type
constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return GL_VERTEX_SHADER;
case Maxwell::ShaderProgram::Geometry:
return GL_GEOMETRY_SHADER;
case Maxwell::ShaderProgram::Fragment:
return GL_FRAGMENT_SHADER;
default:
return GL_NONE;
/// Helper function to set shader uniform block bindings for a single shader stage
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
Maxwell::ShaderStage binding, std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
}
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
"Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
/// Sets shader uniform block bindings for an entire shader program
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
sizeof(GLShader::MaxwellUniformData));
}
CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
: addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
GLShader::ProgramResult program_result;
GLenum gl_type{};
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
@@ -63,14 +70,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
case Maxwell::ShaderProgram::VertexB:
CalculateProperties();
program_result = GLShader::GenerateVertexShader(setup);
gl_type = GL_VERTEX_SHADER;
break;
case Maxwell::ShaderProgram::Geometry:
CalculateProperties();
program_result = GLShader::GenerateGeometryShader(setup);
gl_type = GL_GEOMETRY_SHADER;
break;
case Maxwell::ShaderProgram::Fragment:
CalculateProperties();
program_result = GLShader::GenerateFragmentShader(setup);
gl_type = GL_FRAGMENT_SHADER;
break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
@@ -78,105 +88,59 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
return;
}
code = program_result.first;
entries = program_result.second;
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
if (program_type != Maxwell::ShaderProgram::Geometry) {
OGLShader shader;
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
LabelGLObject(GL_PROGRAM, program.handle, addr);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
auto& program = entry->second;
if (is_cache_miss) {
std::string source = AllocateBindings(base_bindings);
source += code;
OGLShader shader;
shader.Create(source.c_str(), GetShaderType(program_type));
program.Create(true, shader.handle);
LabelGLObject(GL_PROGRAM, program.handle, addr);
}
handle = program.handle;
}
// Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
// emulation values
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
return {handle, base_bindings};
}
std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
std::string code = "#version 430 core\n";
code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
}
for (const auto& gmem : entries.global_memory_entries) {
code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
gmem.GetCbufOffset(), base_bindings.gmem++);
}
for (const auto& sampler : entries.samplers) {
code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
return code;
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
auto& programs = entry->second;
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
"ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
"triangles_adjacency", 6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
// Store shader's code to lazily build it on draw
geometry_programs.code = program_result.first;
}
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {
const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
uniform_cache[sampler.GetHash()] = index;
return index;
}
return search->second;
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
std::string source = AllocateBindings(base_bindings);
std::string source = "#version 430 core\n";
source += "layout (" + glsl_topology + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
source += code;
source += geometry_programs.code;
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
SetShaderUniformBlockBindings(target_program.handle);
LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};

View File

@@ -7,15 +7,11 @@
#include <array>
#include <map>
#include <memory>
#include <tuple>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace OpenGL {
@@ -26,16 +22,6 @@ class RasterizerOpenGL;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
bool operator<(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
}
};
class CachedShader final : public RasterizerCacheObject {
public:
CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
@@ -57,45 +43,70 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
GLuint GetProgramHandle(GLenum primitive_mode) {
if (program_type != Maxwell::ShaderProgram::Geometry) {
return program.handle;
}
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
"ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
}
}
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
private:
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
u32 max_vertices, const std::string& debug_name);
void CalculateProperties();
VAddr addr;
std::size_t shader_length;
Maxwell::ShaderProgram program_type;
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
// Non-geometry program.
OGLProgram program;
// Geometry programs. These are needed because GLSL needs an input topology but it's not
// declared by the hardware. Workaround this issue by generating a different shader per input
// topology class.
struct GeometryPrograms {
struct {
std::string code;
OGLProgram points;
OGLProgram lines;
OGLProgram lines_adjacency;
OGLProgram triangles;
OGLProgram triangles_adjacency;
};
} geometry_programs;
std::string AllocateBindings(BaseBindings base_bindings);
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name);
void CalculateProperties();
VAddr addr{};
std::size_t shader_length{};
Maxwell::ShaderProgram program_type{};
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
std::string code;
std::map<BaseBindings, OGLProgram> programs;
std::map<BaseBindings, GeometryPrograms> geometry_programs;
std::map<u32, GLuint> cbuf_resource_cache;
std::map<u32, GLuint> gmem_resource_cache;
std::map<u32, GLuint> resource_cache;
std::map<u32, GLint> uniform_cache;
};

File diff suppressed because it is too large Load Diff

View File

@@ -5,106 +5,21 @@
#pragma once
#include <array>
#include <functional>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace OpenGL::GLShader::Decompiler {
namespace OpenGL::GLShader {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
Maxwell::ShaderStage stage, const std::string& name, u32 index)
: VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
u32 GetIndex() const {
return index;
}
private:
std::string name;
Maxwell::ShaderStage stage{};
u32 index{};
};
class SamplerEntry : public VideoCommon::Shader::Sampler {
public:
explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
const std::string& name)
: VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
private:
std::string name;
Maxwell::ShaderStage stage{};
};
class GlobalMemoryEntry {
public:
explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
std::string name)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
u32 GetCbufIndex() const {
return cbuf_index;
}
u32 GetCbufOffset() const {
return cbuf_offset;
}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
Maxwell::ShaderStage stage{};
std::string name;
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix);
std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix);
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader::Decompiler

View File

@@ -7,57 +7,63 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
layout(std140) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
if (setup.IsDualProgram()) {
out += "bool exec_vertex_b();\n";
}
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
.value_or(ProgramResult());
out += program.first;
if (setup.IsDualProgram()) {
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.value_or(ProgramResult());
out += program_b.first;
}
out += R"(
void main() {
position = vec4(0.0, 0.0, 0.0, 0.0);
execute_vertex();
exec_vertex();
)";
if (setup.IsDualProgram()) {
out += " execute_vertex_b();";
out += " exec_vertex_b();";
}
out += R"(
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
@@ -71,62 +77,73 @@ void main() {
if (config_pack[1] == 1) {
position.w = 1.0;
}
})";
}
)";
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
// Version is intentionally skipped in shader generation, it's added by the lazy compilation.
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
out += "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
.value_or(ProgramResult());
out += R"(
out gl_PerVertex {
vec4 gl_Position;
};
layout (location = 0) in vec4 gs_position[];
layout (location = 0) out vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
layout (std140) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
};
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
out += R"(
void main() {
execute_geometry();
};)";
exec_geometry();
}
)";
out += program.first;
return {out, program.second};
}
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
.value_or(ProgramResult());
out += R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2;
layout (location = 3) out vec4 FragColor3;
layout (location = 4) out vec4 FragColor4;
layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout(location = 0) out vec4 FragColor0;
layout(location = 1) out vec4 FragColor1;
layout(location = 2) out vec4 FragColor2;
layout(location = 3) out vec4 FragColor3;
layout(location = 4) out vec4 FragColor4;
layout(location = 5) out vec4 FragColor5;
layout(location = 6) out vec4 FragColor6;
layout(location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
layout (std140) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -156,20 +173,12 @@ bool AlphaFunc(in float value) {
}
}
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
out += program.first;
out += R"(
void main() {
execute_fragment();
exec_fragment();
}
)";
out += program.first;
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -10,12 +10,164 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
using ProgramCode = std::vector<u64>;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
class ConstBufferEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
is_used = true;
this->index = static_cast<unsigned>(index);
this->stage = stage;
max_offset = std::max(max_offset, static_cast<unsigned>(offset));
}
void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
is_used = true;
is_indirect = true;
this->index = static_cast<unsigned>(index);
this->stage = stage;
}
bool IsUsed() const {
return is_used;
}
bool IsIndirect() const {
return is_indirect;
}
unsigned GetIndex() const {
return index;
}
unsigned GetSize() const {
return max_offset + 1;
}
std::string GetName() const {
return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | index;
}
private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
"buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
};
bool is_used{};
bool is_indirect{};
unsigned index{};
unsigned max_offset{};
Maxwell::ShaderStage stage;
};
class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
: offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
is_shadow(is_shadow) {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return sampler_index;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
std::string GetName() const {
return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
std::to_string(sampler_index);
}
std::string GetTypeString() const {
using Tegra::Shader::TextureType;
std::string glsl_type;
switch (type) {
case TextureType::Texture1D:
glsl_type = "sampler1D";
break;
case TextureType::Texture2D:
glsl_type = "sampler2D";
break;
case TextureType::Texture3D:
glsl_type = "sampler3D";
break;
case TextureType::TextureCube:
glsl_type = "samplerCube";
break;
default:
UNIMPLEMENTED();
}
if (is_array)
glsl_type += "Array";
if (is_shadow)
glsl_type += "Shadow";
return glsl_type;
}
Tegra::Shader::TextureType GetType() const {
return type;
}
bool IsArray() const {
return is_array;
}
bool IsShadow() const {
return is_shadow;
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {
return TextureSamplerNames[static_cast<std::size_t>(stage)];
}
private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
"tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
};
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
std::size_t offset;
Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
bool is_array; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffer_entries;
std::vector<SamplerEntry> texture_samplers;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
std::size_t shader_length;
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup {
explicit ShaderSetup(ProgramCode program_code) {

View File

@@ -462,35 +462,29 @@ void OpenGLState::ApplyPolygonOffset() const {
}
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
if (textures[i] != cur_state_texture_unit.texture) {
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
if (texture_unit.texture != cur_state_texture_unit.texture) {
glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
glBindTexture(texture_unit.target, texture_unit.texture);
}
// Update the texture swizzle
if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
texture_unit.swizzle.b, texture_unit.swizzle.a};
glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
}
}
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data());
}
}
void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::size_t first{}, last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
for (std::size_t i = 0; i < std::size(samplers); ++i) {
samplers[i] = texture_units[i].sampler;

View File

@@ -126,14 +126,26 @@ public:
struct TextureUnit {
GLuint texture; // GL_TEXTURE_BINDING_2D
GLuint sampler; // GL_SAMPLER_BINDING
GLenum target;
struct {
GLint r; // GL_TEXTURE_SWIZZLE_R
GLint g; // GL_TEXTURE_SWIZZLE_G
GLint b; // GL_TEXTURE_SWIZZLE_B
GLint a; // GL_TEXTURE_SWIZZLE_A
} swizzle;
void Unbind() {
texture = 0;
swizzle.r = GL_RED;
swizzle.g = GL_GREEN;
swizzle.b = GL_BLUE;
swizzle.a = GL_ALPHA;
}
void Reset() {
Unbind();
sampler = 0;
target = GL_TEXTURE_2D;
}
};
std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;

View File

@@ -14,7 +14,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/settings.h"
@@ -98,6 +97,18 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_)
: emu_window{emu_window_} {
if (Settings::values.use_multi_core) {
emu_window.MakeCurrent();
}
}
ScopeAcquireGLContext::~ScopeAcquireGLContext() {
if (Settings::values.use_multi_core) {
emu_window.DoneCurrent();
}
}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
: VideoCore::RendererBase{window} {}
@@ -106,6 +117,7 @@ RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
void RendererOpenGL::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
ScopeAcquireGLContext acquire_context{render_window};
Core::System::GetInstance().GetPerfStats().EndSystemFrame();
@@ -171,6 +183,10 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
Memory::GetPointer(framebuffer_addr),
gl_framebuffer_data.data(), true);
state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@@ -178,11 +194,14 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, screen_info.texture.gl_format,
screen_info.texture.gl_type, gl_framebuffer_data.data());
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
screen_info.texture.gl_format, screen_info.texture.gl_type,
gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
state.texture_units[0].texture = 0;
state.Apply();
}
}
@@ -192,8 +211,17 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
// Update existing texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
state.texture_units[0].texture = 0;
state.Apply();
}
/**
@@ -233,13 +261,26 @@ void RendererOpenGL::InitOpenGLObjects() {
sizeof(ScreenRectVertex));
// Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D);
screen_info.texture.resource.Create();
const GLuint texture = screen_info.texture.resource.handle;
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
// Allocation of storage is deferred until the first frame, when we
// know the framebuffer size.
state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
screen_info.display_texture = screen_info.texture.resource.handle;
state.texture_units[0].texture = 0;
state.Apply();
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
}
@@ -255,19 +296,20 @@ void RendererOpenGL::CreateRasterizer() {
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
internal_format = GL_RGBA8;
internal_format = GL_RGBA;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
gl_framebuffer_data.resize(texture.width * texture.height * 4);
break;
default:
internal_format = GL_RGBA8;
internal_format = GL_RGBA;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -276,9 +318,15 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
UNREACHABLE();
}
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
texture.gl_format, texture.gl_type, nullptr);
state.texture_units[0].texture = 0;
state.Apply();
}
void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -320,6 +368,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
}};
state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
@@ -457,7 +506,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
ScopeAcquireGLContext acquire_context{render_window};
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);

View File

@@ -39,6 +39,16 @@ struct ScreenInfo {
TextureInfo texture;
};
/// Helper class to acquire/release OpenGL context within a given scope
class ScopeAcquireGLContext : NonCopyable {
public:
explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window);
~ScopeAcquireGLContext();
private:
Core::Frontend::EmuWindow& emu_window;
};
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& window);

View File

@@ -1,206 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <set>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
/// Merges exit method of two parallel branches.
constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
if (a == ExitMethod::Undetermined) {
return b;
}
if (b == ExitMethod::Undetermined) {
return a;
}
if (a == b) {
return a;
}
return ExitMethod::Conditional;
}
/**
* Returns whether the instruction at the specified offset is a 'sched' instruction.
* Sched instructions always appear before a sequence of 3 instructions.
*/
constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
constexpr u32 SchedPeriod = 4;
u32 absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
} // namespace
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
std::set<u32> labels;
const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
if (exit_method != ExitMethod::AlwaysEnd) {
UNREACHABLE_MSG("Program does not always end");
}
if (labels.empty()) {
basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
return;
}
labels.insert(main_offset);
for (const u32 label : labels) {
const auto next_it = labels.lower_bound(label + 1);
const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
basic_blocks.insert({label, DecodeRange(label, next_label)});
}
}
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
const auto [iter, inserted] =
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
ExitMethod& exit_method = iter->second;
if (!inserted)
return exit_method;
for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
coverage_begin = std::min(coverage_begin, offset);
coverage_end = std::max(coverage_end, offset + 1);
const Instruction instr = {program_code[offset]};
const auto opcode = OpCode::Decode(instr);
if (!opcode)
continue;
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
// The EXIT instruction can be predicated, which means that the shader can conditionally
// end on this instruction. We have to consider the case where the condition is not met
// and check the exit method of that other basic block.
using Tegra::Shader::Pred;
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
return exit_method = ExitMethod::AlwaysEnd;
} else {
const ExitMethod not_met = Scan(offset + 1, end, labels);
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
}
}
case OpCode::Id::BRA: {
const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
const ExitMethod no_jmp = Scan(offset + 1, end, labels);
const ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
case OpCode::Id::SSY:
case OpCode::Id::PBK: {
// The SSY and PBK use a similar encoding as the BRA instruction.
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer branching is not supported");
const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
// Continue scanning for an exit method.
break;
}
}
}
return exit_method = ExitMethod::AlwaysReturn;
}
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
BasicBlock basic_block;
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
pc = DecodeInstr(basic_block, pc);
}
return basic_block;
}
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
// Ignore sched instructions when generating code.
if (IsSchedInstruction(pc, main_offset)) {
return pc + 1;
}
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
// Decoding failure
if (!opcode) {
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
return pc + 1;
}
bb.push_back(
Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
using Tegra::Shader::Pred;
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
"NeverExecute predicate not implemented");
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
decoders = {
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
{OpCode::Type::Shift, &ShaderIR::DecodeShift},
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
{OpCode::Type::Video, &ShaderIR::DecodeVideo},
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
};
std::vector<Node> tmp_block;
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
pc = (this->*decoder->second)(tmp_block, bb, pc);
} else {
pc = DecodeOther(tmp_block, bb, pc);
}
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
// executed.
const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
bb.push_back(
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
} else {
for (auto& node : tmp_block) {
bb.push_back(std::move(node));
}
}
return pc + 1;
}
} // namespace VideoCommon::Shader

View File

@@ -1,155 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::SubOp;
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() -> Node {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
// MOV does not have neither 'abs' nor 'neg' bits.
SetRegister(bb, instr.gpr0, op_b);
break;
}
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
instr.fmul.tab5cb8_2.Value());
UNIMPLEMENTED_IF_MSG(
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
// TODO(Rodrigo): Should precise be used when there's a postfactor?
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
if (instr.fmul.postfactor != 0) {
auto postfactor = static_cast<s32>(instr.fmul.postfactor);
// Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
// logic.
if (postfactor >= 4) {
postfactor = 7 - postfactor;
} else {
postfactor = 0 - postfactor;
}
if (postfactor > 0) {
value = Operation(OperationCode::FMul, NO_PRECISE, value,
Immediate(static_cast<f32>(1 << postfactor)));
} else {
value = Operation(OperationCode::FDiv, NO_PRECISE, value,
Immediate(static_cast<f32>(1 << -postfactor)));
}
}
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::MUFU: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
Node value = [&]() {
switch (instr.sub_op) {
case SubOp::Cos:
return Operation(OperationCode::FCos, PRECISE, op_a);
case SubOp::Sin:
return Operation(OperationCode::FSin, PRECISE, op_a);
case SubOp::Ex2:
return Operation(OperationCode::FExp2, PRECISE, op_a);
case SubOp::Lg2:
return Operation(OperationCode::FLog2, PRECISE, op_a);
case SubOp::Rcp:
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
case SubOp::Rsq:
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
case SubOp::Sqrt:
return Operation(OperationCode::FSqrt, PRECISE, op_a);
default:
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
static_cast<unsigned>(instr.sub_op.Value()));
return Immediate(0);
}
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,70 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
}
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
const bool negate_b =
opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
// instr.alu_half.type_a
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20);
default:
UNREACHABLE();
return Immediate(0);
}
}();
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() {
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
return Operation(OperationCode::HAdd, meta, op_a, op_b);
case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R:
return Operation(OperationCode::HMul, meta, op_a, op_b);
default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0);
}
}();
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,51 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
} else {
UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
}
UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
"Half float immediate saturation not implemented");
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
const Node op_b = UnpackHalfImmediate(instr, true);
Node value = [&]() {
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_IMM:
return Operation(OperationCode::HAdd, meta, op_a, op_b);
case OpCode::Id::HMUL2_IMM:
return Operation(OperationCode::HMul, meta, op_a, op_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,52 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::MOV32_IMM: {
SetRegister(bb, instr.gpr0, GetImmediate32(instr));
break;
}
case OpCode::Id::FMUL32_IMM: {
Node value =
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
value = GetSaturatedFloat(value, instr.fmul32.saturate);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD32I: {
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
instr.fadd32i.negate_a);
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
instr.fadd32i.negate_b);
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,287 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::IAdd3Height;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::IADD3_C:
case OpCode::Id::IADD3_R:
case OpCode::Id::IADD3_IMM: {
Node op_c = GetRegister(instr.gpr39);
const auto ApplyHeight = [&](IAdd3Height height, Node value) {
switch (height) {
case IAdd3Height::None:
return value;
case IAdd3Height::LowerHalfWord:
return BitfieldExtract(value, 0, 16);
case IAdd3Height::UpperHalfWord:
return BitfieldExtract(value, 16, 16);
default:
UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
return Immediate(0);
}
};
if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
op_a = ApplyHeight(instr.iadd3.height_a, op_a);
op_b = ApplyHeight(instr.iadd3.height_b, op_b);
op_c = ApplyHeight(instr.iadd3.height_c, op_c);
}
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
const Node value = [&]() {
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
}
const Node shifted = [&]() {
switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to
// https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
// The addition between op_a and op_b should be done in uint33, more
// investigation required
return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
Immediate(16));
case Tegra::Shader::IAdd3Mode::LeftShift:
return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
Immediate(16));
default:
return add_ab;
}
}();
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
}();
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::ISCADD_C:
case OpCode::Id::ISCADD_R:
case OpCode::Id::ISCADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in ISCADD is not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::POPC_C:
case OpCode::Id::POPC_R:
case OpCode::Id::POPC_IMM: {
if (instr.popc.invert) {
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
}
const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
if (instr.alu.lop.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
if (instr.alu.lop.invert_b)
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
instr.generates_cc);
break;
}
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
const Node op_c = GetRegister(instr.gpr39);
const Node lut = [&]() {
if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
return Immediate(instr.alu.lop3.GetImmLut28());
} else {
return Immediate(instr.alu.lop3.GetImmLut48());
}
}();
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
break;
}
case OpCode::Id::IMNMX_C:
case OpCode::Id::IMNMX_R:
case OpCode::Id::IMNMX_IMM: {
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
const bool is_signed = instr.imnmx.is_signed;
const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LEA_R2:
case OpCode::Id::LEA_R1:
case OpCode::Id::LEA_IMM:
case OpCode::Id::LEA_RZ:
case OpCode::Id::LEA_HI: {
const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::LEA_R2: {
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
}
case OpCode::Id::LEA_R1: {
const bool neg = instr.lea.r1.neg != 0;
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
GetRegister(instr.gpr20),
Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
}
case OpCode::Id::LEA_IMM: {
const bool neg = instr.lea.imm.neg != 0;
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
}
case OpCode::Id::LEA_RZ: {
const bool neg = instr.lea.rz.neg != 0;
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
}
case OpCode::Id::LEA_HI:
default:
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
}
}();
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
"Unhandled LEA Predicate");
const Node shifted_c =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
}
return pc;
}
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
Node imm_lut, bool sets_cc) {
constexpr u32 lop_iterations = 32;
const Node one = Immediate(1);
const Node two = Immediate(2);
Node value{};
for (u32 i = 0; i < lop_iterations; ++i) {
const Node shift_amount = Immediate(i);
const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
const Node shifted_bit =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
const Node right =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
if (i > 0) {
value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
} else {
value = right;
}
}
SetInternalFlagsFromInteger(bb, value, sets_cc);
SetRegister(bb, dest, value);
}
} // namespace VideoCommon::Shader

View File

@@ -1,96 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::LogicOperation;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::PredicateResultMode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
switch (opcode->get().GetId()) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LOP32I: {
if (instr.alu.lop32i.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
if (instr.alu.lop32i.invert_b)
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
opcode->get().GetName());
}
return pc;
}
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
Node op_a, Node op_b, PredicateResultMode predicate_mode,
Pred predicate, bool sets_cc) {
const Node result = [&]() {
switch (logic_op) {
case LogicOperation::And:
return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
case LogicOperation::Or:
return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
case LogicOperation::Xor:
return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
case LogicOperation::PassB:
return op_b;
default:
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
return Immediate(0);
}
}();
SetInternalFlagsFromInteger(bb, result, sets_cc);
SetRegister(bb, dest, result);
// Write the predicate value depending on the predicate mode.
switch (predicate_mode) {
case PredicateResultMode::None:
// Do nothing.
return;
case PredicateResultMode::NotZero: {
// Set the predicate to true if the result is not zero.
const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
SetPredicate(bb, static_cast<u64>(predicate), compare);
break;
}
default:
UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
static_cast<u32>(predicate_mode));
}
}
} // namespace VideoCommon::Shader

View File

@@ -1,49 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.bfe.negate_b);
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
switch (opcode->get().GetId()) {
case OpCode::Id::BFE_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in BFE is not implemented");
const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
const Node outer_shift_imm =
Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
const Node inner_shift =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
const Node outer_shift =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
SetRegister(bb, instr.gpr0, outer_shift);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,41 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::BFI_IMM_R:
return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
default:
UNREACHABLE();
return {Immediate(0), Immediate(0)};
}
}();
const Node insert = GetRegister(instr.gpr8);
const Node offset = BitfieldExtract(packed_shift, 0, 8);
const Node bits = BitfieldExtract(packed_shift, 8, 8);
const Node value =
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,149 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::I2I_R: {
UNIMPLEMENTED_IF(instr.conversion.selector);
const bool input_signed = instr.conversion.is_input_signed;
const bool output_signed = instr.conversion.is_output_signed;
Node value = GetRegister(instr.gpr20);
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
input_signed);
if (input_signed != output_signed) {
value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C: {
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
const bool input_signed = instr.conversion.is_input_signed;
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2F_R:
case OpCode::Id::F2F_C: {
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2F is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
switch (instr.conversion.f2f.rounding) {
case Tegra::Shader::F2fRoundingOp::None:
return value;
case Tegra::Shader::F2fRoundingOp::Round:
return Operation(OperationCode::FRoundEven, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Floor:
return Operation(OperationCode::FFloor, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Ceil:
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
}
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
return Immediate(0);
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::F2I_R:
case OpCode::Id::F2I_C: {
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in F2I is not implemented");
Node value = [&]() {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
value = [&]() {
switch (instr.conversion.f2i.rounding) {
case Tegra::Shader::F2iRoundingOp::None:
return value;
case Tegra::Shader::F2iRoundingOp::Floor:
return Operation(OperationCode::FFloor, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Ceil:
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2iRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
static_cast<u32>(instr.conversion.f2i.rounding.Value()));
return Immediate(0);
}
}();
const bool is_signed = instr.conversion.is_output_signed;
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,59 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
instr.ffma.tab5980_1.Value());
const Node op_a = GetRegister(instr.gpr8);
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::FFMA_CR: {
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
GetRegister(instr.gpr39)};
}
case OpCode::Id::FFMA_RR:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::FFMA_RC: {
return {GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
}
case OpCode::Id::FFMA_IMM:
return {GetImmediate19(instr), GetRegister(instr.gpr39)};
default:
UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
return {Immediate(0), Immediate(0)};
}
}();
op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,58 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
instr.fset.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
const Node predicate = Operation(combiner, first_pred, second_pred);
const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
if (instr.fset.bf) {
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
} else {
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
}
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,56 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
instr.fsetp.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
const Node value = Operation(combiner, predicate, second_pred);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(bb, instr.fsetp.pred3, value);
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
// if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
const Node second_value = Operation(combiner, negated_pred, second_pred);
SetPredicate(bb, instr.fsetp.pred0, second_value);
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,67 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
// instr.hset2.type_a
// instr.hset2.type_b
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
default:
UNREACHABLE();
return Immediate(0);
}
}();
op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
const Node true_value = Immediate(raw_value << (i * 16));
const Node false_value = Immediate(0);
const Node comparison =
Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
const Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
}
const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,62 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
const Node op_b = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_R:
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
instr.hsetp2.negate_b);
default:
UNREACHABLE();
return Immediate(0);
}
}();
// We can't use the constant predicate as destination.
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
const OperationCode pair_combiner =
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
const Node first_pred = Operation(pair_combiner, comparison);
// Set the primary predicate to the result of Predicate OP SecondPredicate
const Node value = Operation(combiner, first_pred, second_pred);
SetPredicate(bb, instr.hsetp2.pred3, value);
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,76 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::HalfPrecision;
using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
} else {
UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
}
constexpr auto identity = HalfType::H0_H1;
const HalfType type_a = instr.hfma2.type_a;
const Node op_a = GetRegister(instr.gpr8);
bool neg_b{}, neg_c{};
auto [saturate, type_b, op_b, type_c,
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_IMM_R:
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
default:
return {false, identity, Immediate(0), identity, Immediate(0)};
}
}();
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,50 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
// is true, and to 0 otherwise.
const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
const Node first_pred =
GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
const Node predicate = Operation(combiner, first_pred, second_pred);
const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,53 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
// We can't use the constant predicate as destination.
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
const Node predicate =
GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
const Node value = Operation(combiner, predicate, second_pred);
SetPredicate(bb, instr.isetp.pred3, value);
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,737 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::TextureMiscMode;
using Tegra::Shader::TextureProcessMode;
using Tegra::Shader::TextureType;
static std::size_t GetCoordCount(TextureType texture_type) {
switch (texture_type) {
case TextureType::Texture1D:
return 1;
case TextureType::Texture2D:
return 2;
case TextureType::Texture3D:
case TextureType::TextureCube:
return 3;
default:
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
return 0;
}
}
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::LD_A: {
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto LoadNextElement = [&](u32 reg_offset) {
const Node buffer = GetRegister(instr.gpr39);
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
next_element, input_mode, buffer);
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
LoadNextElement(reg_offset);
}
break;
}
case OpCode::Id::LD_C: {
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
Node index = GetRegister(instr.gpr8);
const Node op_a =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
switch (instr.ld_c.type.Value()) {
case Tegra::Shader::UniformType::Single:
SetRegister(bb, instr.gpr0, op_a);
break;
case Tegra::Shader::UniformType::Double: {
const Node op_b =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
SetTemporal(bb, 0, op_a);
SetTemporal(bb, 1, op_b);
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
}
break;
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
static_cast<unsigned>(instr.ld_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
const Node lmem = GetLocalMemory(index);
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetRegister(bb, instr.gpr0, lmem);
break;
default:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
break;
}
case OpCode::Id::LDG: {
const u32 count = [&]() {
switch (instr.ldg.type) {
case Tegra::Shader::UniformType::Single:
return 1;
case Tegra::Shader::UniformType::Double:
return 2;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
return 4;
default:
UNIMPLEMENTED_MSG("Unimplemented LDG size!");
return 1;
}
}();
const Node addr_register = GetRegister(instr.gpr8);
const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
const auto cbuf = std::get_if<CbufNode>(base_address);
ASSERT(cbuf != nullptr);
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
bb.push_back(Comment(
fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
used_global_memory_bases.insert(descriptor);
const Node immediate_offset =
Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
const Node base_real_address =
Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
SetTemporal(bb, i, gmem);
}
for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
case OpCode::Id::ST_A: {
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
"Indirect attribute loads are not supported");
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
next_element, GetRegister(instr.gpr39));
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
StoreNextElement(reg_offset);
}
break;
}
case OpCode::Id::ST_L: {
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<u32>(instr.st_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetLocalMemory(bb, index, GetRegister(instr.gpr0));
break;
default:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
}
case OpCode::Id::TEX: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
break;
}
case OpCode::Id::TEXS: {
const TextureType texture_type{instr.texs.GetTextureType()};
const bool is_array{instr.texs.IsArrayTexture()};
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
}
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
if (instr.texs.fp32_flag) {
WriteTexsInstructionFloat(bb, instr, components);
} else {
WriteTexsInstructionHalfFloat(bb, instr, components);
}
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
}
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
WriteTexInstructionFloat(bb, instr,
GetTld4Code(instr, texture_type, depth_compare, is_array));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
}
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
std::vector<Node> coords;
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
coords.push_back(op_b);
} else {
coords.push_back(op_a);
coords.push_back(op_b);
}
const auto num_coords = static_cast<u32>(coords.size());
coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, num_coords};
values[element] =
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
}
WriteTexsInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
}
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
MetaTexture meta{sampler, element};
const Node value = Operation(OperationCode::F4TextureQueryDimensions,
std::move(meta), GetRegister(instr.gpr8));
SetTemporal(bb, element, value);
}
for (u32 i = 0; i < 4; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
static_cast<u32>(instr.txq.query_type.Value()));
}
break;
}
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
switch (texture_type) {
case TextureType::Texture1D:
coords.push_back(GetRegister(instr.gpr8));
break;
case TextureType::Texture2D:
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
break;
default:
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
// Fallback to interpreting as a 2D texture for now
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
const Node value =
Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
SetTemporal(bb, element, value);
}
for (u32 element = 0; element < 2; ++element) {
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
}
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
return pc;
}
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
bool is_array, bool is_shadow) {
const auto offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
itr->IsShadow() == is_shadow);
return *itr;
}
// Otherwise create a new mapping for this sampler
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
SetTemporal(bb, dest_elem++, components[elem]);
}
// After writing values in temporals, move them to the real registers
for (u32 i = 0; i < dest_elem; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
}
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
SetTemporal(bb, dest_elem++, components[component]);
}
for (u32 i = 0; i < dest_elem; ++i) {
if (i < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
}
}
}
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
const Node4& components) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
values[dest_elem++] = components[component];
}
if (dest_elem == 0)
return;
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
if (dest_elem <= 2) {
SetRegister(bb, instr.gpr0, first_value);
return;
}
SetTemporal(bb, 0, first_value);
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr28, GetTemporal(1));
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array,
std::size_t array_offset, std::size_t bias_offset,
std::vector<Node>&& coords) {
UNIMPLEMENTED_IF_MSG(
(texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
(texture_type == TextureType::TextureCube && is_array && depth_compare),
"This method is not supported.");
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
const OperationCode read_method =
lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
std::optional<u32> array_offset_value;
if (is_array)
array_offset_value = static_cast<u32>(array_offset);
const auto coords_count = static_cast<u32>(coords.size());
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
if (process_mode == TextureProcessMode::LZ) {
coords.push_back(Immediate(0.0f));
} else {
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
}
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, coords_count, array_offset_value};
values[element] = Operation(read_method, std::move(meta), std::move(params));
}
return values;
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
// 1D.DC in opengl the 2nd component is ignored.
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
coords.push_back(Immediate(0.0f));
}
std::size_t array_offset{};
if (is_array) {
array_offset = coords.size();
coords.push_back(GetRegister(array_register));
}
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20
// or in the next register if lod or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
coords.push_back(GetRegister(depth_register));
}
// Fill ignored coordinates
while (coords.size() < total_coord_count) {
coords.push_back(Immediate(0));
}
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
0, std::move(coords));
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
const u64 last_coord_register =
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
std::size_t array_offset{};
if (is_array) {
array_offset = coords.size();
coords.push_back(GetRegister(array_register));
}
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20
// or in the next register if lod or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
coords.push_back(GetRegister(depth_register));
}
// Fill ignored coordinates
while (coords.size() < total_coord_count) {
coords.push_back(Immediate(0));
}
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
(coord_count > 2 ? 1 : 0), std::move(coords));
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
std::optional<u32> array_offset;
if (is_array) {
array_offset = static_cast<u32>(coords.size());
coords.push_back(GetRegister(array_register));
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
values[element] =
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// if is array gpr20 is used
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
const u64 last_coord_register =
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
std::optional<u32> array_offset;
if (is_array) {
array_offset = static_cast<u32>(coords.size());
coords.push_back(GetRegister(array_register));
}
const auto coords_count = static_cast<u32>(coords.size());
if (lod_enabled) {
// When lod is used always is in grp20
coords.push_back(GetRegister(instr.gpr20));
} else {
coords.push_back(Immediate(0));
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto params = coords;
MetaTexture meta{sampler, element, coords_count, array_offset};
values[element] =
Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
}
return values;
}
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
std::size_t max_coords, std::size_t max_inputs) {
const std::size_t coord_count = GetCoordCount(texture_type);
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
UNIMPLEMENTED_MSG("Unsupported Texture operation");
total_coord_count = std::min(total_coord_count, max_coords);
}
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
total_coord_count +=
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader

View File

@@ -1,178 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
static_cast<u32>(cc));
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
bb.push_back(Operation(OperationCode::Exit));
if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
// If this is an unconditional exit then just end processing here,
// otherwise we have to account for the possibility of the condition
// not being met, so continue processing the next instruction.
pc = MAX_PROGRAM_LENGTH - 1;
}
break;
case Tegra::Shader::FlowCondition::Fcsm_Tr:
// TODO(bunnei): What is this used for? If we assume this conditon is not
// satisifed, dual vertex shaders in Farming Simulator make more sense
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
break;
default:
UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
static_cast<u32>(instr.flow.cond.Value()));
}
break;
}
case OpCode::Id::KIL: {
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
static_cast<u32>(cc));
bb.push_back(Operation(OperationCode::Discard));
break;
}
case OpCode::Id::MOV_SYS: {
switch (instr.sys20) {
case Tegra::Shader::SystemVariable::InvocationInfo: {
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
SetRegister(bb, instr.gpr0, Immediate(0u));
break;
}
case Tegra::Shader::SystemVariable::Ydirection: {
// Config pack's third value is Y_NEGATE's state.
SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
}
break;
}
case OpCode::Id::BRA: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"BRA with constant buffers are not implemented");
const u32 target = pc + instr.bra.GetBranchTarget();
const Node branch = Operation(OperationCode::Branch, Immediate(target));
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
if (cc != Tegra::Shader::ConditionCode::T) {
bb.push_back(Conditional(GetConditionCode(cc), {branch}));
} else {
bb.push_back(branch);
}
break;
}
case OpCode::Id::SSY: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer flow is not supported");
// The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
// target of the jump that the SYNC instruction will make. The SSY opcode has a similar
// structure to the BRA opcode.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
break;
}
case OpCode::Id::PBK: {
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
"Constant buffer PBK is not supported");
// PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
// using SYNC on a PBK address will kill the shader execution. We don't emulate this because
// it's very unlikely a driver will emit such invalid shader.
const u32 target = pc + instr.bra.GetBranchTarget();
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
break;
}
case OpCode::Id::SYNC: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
static_cast<u32>(cc));
// The SYNC opcode jumps to the address previously set by the SSY opcode
bb.push_back(Operation(OperationCode::PopFlowStack));
break;
}
case OpCode::Id::BRK: {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
static_cast<u32>(cc));
// The BRK opcode jumps to the address previously set by the PBK opcode
bb.push_back(Operation(OperationCode::PopFlowStack));
break;
}
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::OUT_R: {
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
"Stream buffer is not supported");
if (instr.out.emit) {
// gpr0 is used to store the next address and gpr8 contains the address to emit.
// Hardware uses pointers here but we just ignore it
bb.push_back(Operation(OperationCode::EmitVertex));
SetRegister(bb, instr.gpr0, Immediate(0));
}
if (instr.out.cut) {
bb.push_back(Operation(OperationCode::EndPrimitive));
}
break;
}
case OpCode::Id::ISBERD: {
UNIMPLEMENTED_IF(instr.isberd.o != 0);
UNIMPLEMENTED_IF(instr.isberd.skew != 0);
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
case OpCode::Id::DEPBAR: {
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,67 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::PSETP: {
const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
// We can't use the constant predicate as destination.
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
const Node predicate = Operation(combiner, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
// enabled
SetPredicate(bb, instr.psetp.pred0,
Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
second_pred));
}
break;
}
case OpCode::Id::CSETP: {
const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
const Node condition_code = GetConditionCode(instr.csetp.cc);
const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
}
if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,46 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in PSET is not implemented");
const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
const Node predicate = Operation(combiner, first_pred, second_pred);
const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
if (instr.pset.bf) {
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
} else {
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
}
SetRegister(bb, instr.gpr0, value);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,51 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
const Node apply_mask = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::R2P_IMM:
return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
default:
UNREACHABLE();
return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
}
}();
const Node mask = GetRegister(instr.gpr8);
const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
constexpr u32 programmable_preds = 7;
for (u64 pred = 0; pred < programmable_preds; ++pred) {
const auto shift = static_cast<u32>(pred);
const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
const Node condition =
Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
bb.push_back(Conditional(condition, {code}));
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,55 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
instr.shift.is_signed, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM: {
const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,111 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
using Tegra::Shader::VideoType;
using Tegra::Shader::VmadShr;
u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
const Node op_a =
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
instr.video.type_a, instr.video.byte_height_a);
const Node op_b = [&]() {
if (instr.video.use_register_b) {
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
instr.video.signed_b, instr.video.type_b,
instr.video.byte_height_b);
}
if (instr.video.signed_b) {
const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
return Immediate(static_cast<u32>(imm));
} else {
return Immediate(instr.alu.GetImm20_16());
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::VMAD: {
const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
const Node op_c = GetRegister(instr.gpr39);
Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
value =
SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::VSETP: {
// We can't use the constant predicate as destination.
ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
// if enabled
const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
}
return pc;
}
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
Tegra::Shader::VideoType type, u64 byte_height) {
if (!is_chunk) {
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
}
const Node zero = Immediate(0);
switch (type) {
case Tegra::Shader::VideoType::Size16_Low:
return BitfieldExtract(op, 0, 16);
case Tegra::Shader::VideoType::Size16_High:
return BitfieldExtract(op, 16, 16);
case Tegra::Shader::VideoType::Size32:
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
UNIMPLEMENTED();
return zero;
case Tegra::Shader::VideoType::Invalid:
UNREACHABLE_MSG("Invalid instruction encoding");
return zero;
default:
UNREACHABLE();
return zero;
}
}
} // namespace VideoCommon::Shader

View File

@@ -1,97 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.xmad.sign_a);
UNIMPLEMENTED_IF(instr.xmad.sign_b);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in XMAD is not implemented");
Node op_a = GetRegister(instr.gpr8);
// TODO(bunnei): Needs to be fixed once op_a or op_b is signed
UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
const bool is_signed_a = instr.xmad.sign_a == 1;
const bool is_signed_b = instr.xmad.sign_b == 1;
const bool is_signed_c = is_signed_a;
auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RR:
return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RC:
return {false, GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
case OpCode::Id::XMAD_IMM:
return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
}
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
return {false, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
const Node original_b = op_b;
op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
// TODO(Rodrigo): Use an appropiate sign for this operation
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
if (instr.xmad.product_shift_left) {
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
}
const Node original_c = op_c;
op_c = [&]() {
switch (instr.xmad.mode) {
case Tegra::Shader::XmadMode::None:
return original_c;
case Tegra::Shader::XmadMode::CLo:
return BitfieldExtract(original_c, 0, 16);
case Tegra::Shader::XmadMode::CHi:
return BitfieldExtract(original_c, 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
NO_PRECISE, original_b, Immediate(16));
return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
shifted_b);
}
default:
UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
return Immediate(0);
}
}();
// TODO(Rodrigo): Use an appropiate sign for this operation
Node sum = Operation(OperationCode::IAdd, product, op_c);
if (is_merge) {
const Node a = BitfieldExtract(sum, 0, 16);
const Node b =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
}
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
SetRegister(bb, instr.gpr0, sum);
return pc;
}
} // namespace VideoCommon::Shader

View File

@@ -1,444 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cmath>
#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::IpaMode;
using Tegra::Shader::Pred;
using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
Node ShaderIR::StoreNode(NodeData&& node_data) {
auto store = std::make_unique<NodeData>(node_data);
const Node node = store.get();
stored_nodes.push_back(std::move(store));
return node;
}
Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
return StoreNode(ConditionalNode(condition, std::move(code)));
}
Node ShaderIR::Comment(const std::string& text) {
return StoreNode(CommentNode(text));
}
Node ShaderIR::Immediate(u32 value) {
return StoreNode(ImmediateNode(value));
}
Node ShaderIR::GetRegister(Register reg) {
if (reg != Register::ZeroIndex) {
used_registers.insert(static_cast<u32>(reg));
}
return StoreNode(GprNode(reg));
}
Node ShaderIR::GetImmediate19(Instruction instr) {
return Immediate(instr.alu.GetImm20_19());
}
Node ShaderIR::GetImmediate32(Instruction instr) {
return Immediate(instr.alu.GetImm20_32());
}
Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
const auto index = static_cast<u32>(index_);
const auto offset = static_cast<u32>(offset_);
const auto [entry, is_new] = used_cbufs.try_emplace(index);
entry->second.MarkAsUsed(offset);
return StoreNode(CbufNode(index, Immediate(offset)));
}
Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
const auto index = static_cast<u32>(index_);
const auto offset = static_cast<u32>(offset_);
const auto [entry, is_new] = used_cbufs.try_emplace(index);
entry->second.MarkAsUsedIndirect();
const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
return StoreNode(CbufNode(index, final_offset));
}
Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
const auto pred = static_cast<Pred>(pred_);
if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
used_predicates.insert(pred);
}
return StoreNode(PredicateNode(pred, negated));
}
Node ShaderIR::GetPredicate(bool immediate) {
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
}
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer) {
const auto [entry, is_new] =
used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
entry->second.insert(input_mode);
return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
if (index == Attribute::Index::ClipDistances0123 ||
index == Attribute::Index::ClipDistances4567) {
const auto clip_index =
static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
used_clip_distances.at(clip_index) = true;
}
used_output_attributes.insert(index);
return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
}
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
const Node node = StoreNode(InternalFlagNode(flag));
if (negated) {
return Operation(OperationCode::LogicalNegate, node);
}
return node;
}
Node ShaderIR::GetLocalMemory(Node address) {
return StoreNode(LmemNode(address));
}
Node ShaderIR::GetTemporal(u32 id) {
return GetRegister(Register::ZeroIndex + 1 + id);
}
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
if (absolute) {
value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
}
if (negate) {
value = Operation(OperationCode::FNegate, NO_PRECISE, value);
}
return value;
}
Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
if (!saturate) {
return value;
}
const Node positive_zero = Immediate(std::copysignf(0, 1));
const Node positive_one = Immediate(1.0f);
return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
}
Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
switch (size) {
case Register::Size::Byte:
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
Immediate(24));
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
Immediate(24));
return value;
case Register::Size::Short:
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
Immediate(16));
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
Immediate(16));
case Register::Size::Word:
// Default - do nothing
return value;
default:
UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
return value;
}
}
Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
if (!is_signed) {
// Absolute or negate on an unsigned is pointless
return value;
}
if (absolute) {
value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
}
if (negate) {
value = Operation(OperationCode::INegate, NO_PRECISE, value);
}
return value;
}
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
const Node value = Immediate(instr.half_imm.PackImmediates());
if (!has_negation) {
return value;
}
const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate);
}
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
switch (merge) {
case Tegra::Shader::HalfMerge::H0_H1:
return src;
case Tegra::Shader::HalfMerge::F32:
return Operation(OperationCode::HMergeF32, src);
case Tegra::Shader::HalfMerge::Mrg_H0:
return Operation(OperationCode::HMergeH0, dest, src);
case Tegra::Shader::HalfMerge::Mrg_H1:
return Operation(OperationCode::HMergeH1, dest, src);
}
UNREACHABLE();
return src;
}
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
if (absolute) {
value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value);
}
if (negate) {
value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true),
GetPredicate(true));
}
return value;
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::LogicalFLessThan},
{PredCondition::Equal, OperationCode::LogicalFEqual},
{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
const auto comparison{PredicateComparisonTable.find(condition)};
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
if (condition == PredCondition::LessThanWithNan ||
condition == PredCondition::NotEqualWithNan ||
condition == PredCondition::LessEqualWithNan ||
condition == PredCondition::GreaterThanWithNan ||
condition == PredCondition::GreaterEqualWithNan) {
predicate = Operation(OperationCode::LogicalOr, predicate,
Operation(OperationCode::LogicalFIsNan, op_a));
predicate = Operation(OperationCode::LogicalOr, predicate,
Operation(OperationCode::LogicalFIsNan, op_b));
}
return predicate;
}
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::LogicalILessThan},
{PredCondition::Equal, OperationCode::LogicalIEqual},
{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
const auto comparison{PredicateComparisonTable.find(condition)};
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
condition == PredCondition::NotEqualWithNan ||
condition == PredCondition::LessEqualWithNan ||
condition == PredCondition::GreaterThanWithNan ||
condition == PredCondition::GreaterEqualWithNan,
"NaN comparisons for integers are not implemented");
return predicate;
}
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
const MetaHalfArithmetic& meta, Node op_a, Node op_b) {
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
condition == PredCondition::NotEqualWithNan ||
condition == PredCondition::LessEqualWithNan ||
condition == PredCondition::GreaterThanWithNan ||
condition == PredCondition::GreaterEqualWithNan,
"Unimplemented NaN comparison for half floats");
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
{PredCondition::LessThan, OperationCode::Logical2HLessThan},
{PredCondition::Equal, OperationCode::Logical2HEqual},
{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan},
{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual},
{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual},
{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan},
{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}};
const auto comparison{PredicateComparisonTable.find(condition)};
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
const Node predicate = Operation(comparison->second, meta, op_a, op_b);
return predicate;
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
{PredOperation::And, OperationCode::LogicalAnd},
{PredOperation::Or, OperationCode::LogicalOr},
{PredOperation::Xor, OperationCode::LogicalXor},
};
const auto op = PredicateOperationTable.find(operation);
UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
return op->second;
}
Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
switch (cc) {
case Tegra::Shader::ConditionCode::NEU:
return GetInternalFlag(InternalFlag::Zero, true);
default:
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
return GetPredicate(static_cast<u64>(Pred::NeverExecute));
}
}
void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) {
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
}
void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) {
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
}
void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) {
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
}
void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) {
bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
}
void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) {
SetRegister(bb, Register::ZeroIndex + 1 + id, value);
}
void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) {
if (!sets_cc) {
return;
}
const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
SetInternalFlag(bb, InternalFlag::Zero, zerop);
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) {
if (!sets_cc) {
return;
}
const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
SetInternalFlag(bb, InternalFlag::Zero, zerop);
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
Immediate(bits));
}
/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code,
bool is_signed) {
if (is_signed) {
return operation_code;
}
switch (operation_code) {
case OperationCode::FCastInteger:
return OperationCode::FCastUInteger;
case OperationCode::IAdd:
return OperationCode::UAdd;
case OperationCode::IMul:
return OperationCode::UMul;
case OperationCode::IDiv:
return OperationCode::UDiv;
case OperationCode::IMin:
return OperationCode::UMin;
case OperationCode::IMax:
return OperationCode::UMax;
case OperationCode::ICastFloat:
return OperationCode::UCastFloat;
case OperationCode::ICastUnsigned:
return OperationCode::UCastSigned;
case OperationCode::ILogicalShiftLeft:
return OperationCode::ULogicalShiftLeft;
case OperationCode::ILogicalShiftRight:
return OperationCode::ULogicalShiftRight;
case OperationCode::IArithmeticShiftRight:
return OperationCode::UArithmeticShiftRight;
case OperationCode::IBitwiseAnd:
return OperationCode::UBitwiseAnd;
case OperationCode::IBitwiseOr:
return OperationCode::UBitwiseOr;
case OperationCode::IBitwiseXor:
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:
return OperationCode::UBitCount;
case OperationCode::LogicalILessThan:
return OperationCode::LogicalULessThan;
case OperationCode::LogicalIEqual:
return OperationCode::LogicalUEqual;
case OperationCode::LogicalILessEqual:
return OperationCode::LogicalULessEqual;
case OperationCode::LogicalIGreaterThan:
return OperationCode::LogicalUGreaterThan;
case OperationCode::LogicalINotEqual:
return OperationCode::LogicalUNotEqual;
case OperationCode::LogicalIGreaterEqual:
return OperationCode::LogicalUGreaterEqual;
case OperationCode::INegate:
UNREACHABLE_MSG("Can't negate an unsigned integer");
case OperationCode::IAbsolute:
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
}
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
return {};
}
} // namespace VideoCommon::Shader

View File

@@ -1,823 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstring>
#include <map>
#include <set>
#include <string>
#include <tuple>
#include <variant>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
namespace VideoCommon::Shader {
class OperationNode;
class ConditionalNode;
class GprNode;
class ImmediateNode;
class InternalFlagNode;
class PredicateNode;
class AbufNode; ///< Attribute buffer
class CbufNode; ///< Constant buffer
class LmemNode; ///< Local memory
class GmemNode; ///< Global memory
class CommentNode;
using ProgramCode = std::vector<u64>;
using NodeData =
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
using Node = const NodeData*;
using Node4 = std::array<Node, 4>;
using BasicBlock = std::vector<Node>;
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
enum class OperationCode {
Assign, /// (float& dest, float src) -> void
Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
FAdd, /// (MetaArithmetic, float a, float b) -> float
FMul, /// (MetaArithmetic, float a, float b) -> float
FDiv, /// (MetaArithmetic, float a, float b) -> float
FFma, /// (MetaArithmetic, float a, float b, float c) -> float
FNegate, /// (MetaArithmetic, float a) -> float
FAbsolute, /// (MetaArithmetic, float a) -> float
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
FMin, /// (MetaArithmetic, float a, float b) -> float
FMax, /// (MetaArithmetic, float a, float b) -> float
FCos, /// (MetaArithmetic, float a) -> float
FSin, /// (MetaArithmetic, float a) -> float
FExp2, /// (MetaArithmetic, float a) -> float
FLog2, /// (MetaArithmetic, float a) -> float
FInverseSqrt, /// (MetaArithmetic, float a) -> float
FSqrt, /// (MetaArithmetic, float a) -> float
FRoundEven, /// (MetaArithmetic, float a) -> float
FFloor, /// (MetaArithmetic, float a) -> float
FCeil, /// (MetaArithmetic, float a) -> float
FTrunc, /// (MetaArithmetic, float a) -> float
FCastInteger, /// (MetaArithmetic, int a) -> float
FCastUInteger, /// (MetaArithmetic, uint a) -> float
IAdd, /// (MetaArithmetic, int a, int b) -> int
IMul, /// (MetaArithmetic, int a, int b) -> int
IDiv, /// (MetaArithmetic, int a, int b) -> int
INegate, /// (MetaArithmetic, int a) -> int
IAbsolute, /// (MetaArithmetic, int a) -> int
IMin, /// (MetaArithmetic, int a, int b) -> int
IMax, /// (MetaArithmetic, int a, int b) -> int
ICastFloat, /// (MetaArithmetic, float a) -> int
ICastUnsigned, /// (MetaArithmetic, uint a) -> int
ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
IBitwiseNot, /// (MetaArithmetic, int a) -> int
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
IBitCount, /// (MetaArithmetic, int) -> int
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
UMin, /// (MetaArithmetic, uint a, uint b) -> uint
UMax, /// (MetaArithmetic, uint a, uint b) -> uint
UCastFloat, /// (MetaArithmetic, float a) -> uint
UCastSigned, /// (MetaArithmetic, int a) -> uint
ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
HAbsolute, /// (f16vec2 a) -> f16vec2
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
HMergeF32, /// (f16vec2 src) -> float
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
HPack2, /// (float a, float b) -> f16vec2
LogicalAssign, /// (bool& dst, bool src) -> void
LogicalAnd, /// (bool a, bool b) -> bool
LogicalOr, /// (bool a, bool b) -> bool
LogicalXor, /// (bool a, bool b) -> bool
LogicalNegate, /// (bool a) -> bool
LogicalPick2, /// (bool2 pair, uint index) -> bool
LogicalAll2, /// (bool2 a) -> bool
LogicalAny2, /// (bool2 a) -> bool
LogicalFLessThan, /// (float a, float b) -> bool
LogicalFEqual, /// (float a, float b) -> bool
LogicalFLessEqual, /// (float a, float b) -> bool
LogicalFGreaterThan, /// (float a, float b) -> bool
LogicalFNotEqual, /// (float a, float b) -> bool
LogicalFGreaterEqual, /// (float a, float b) -> bool
LogicalFIsNan, /// (float a) -> bool
LogicalILessThan, /// (int a, int b) -> bool
LogicalIEqual, /// (int a, int b) -> bool
LogicalILessEqual, /// (int a, int b) -> bool
LogicalIGreaterThan, /// (int a, int b) -> bool
LogicalINotEqual, /// (int a, int b) -> bool
LogicalIGreaterEqual, /// (int a, int b) -> bool
LogicalULessThan, /// (uint a, uint b) -> bool
LogicalUEqual, /// (uint a, uint b) -> bool
LogicalULessEqual, /// (uint a, uint b) -> bool
LogicalUGreaterThan, /// (uint a, uint b) -> bool
LogicalUNotEqual, /// (uint a, uint b) -> bool
LogicalUGreaterEqual, /// (uint a, uint b) -> bool
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4
F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4
F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4
F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4
F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
F4TexelFetch, /// (MetaTexture, int[N], int) -> float4
Branch, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
PopFlowStack, /// () -> void
Exit, /// () -> void
Discard, /// () -> void
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
YNegate, /// () -> float
Amount,
};
enum class InternalFlag {
Zero = 0,
Sign = 1,
Carry = 2,
Overflow = 3,
Amount = 4,
};
/// Describes the behaviour of code path of a given entry point and a return point.
enum class ExitMethod {
Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
AlwaysReturn, ///< All code paths reach the return point.
Conditional, ///< Code path reaches the return point or an END instruction conditionally.
AlwaysEnd, ///< All code paths reach a END instruction.
};
class Sampler {
public:
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow)
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return index;
}
Tegra::Shader::TextureType GetType() const {
return type;
}
bool IsArray() const {
return is_array;
}
bool IsShadow() const {
return is_shadow;
}
bool operator<(const Sampler& rhs) const {
return std::tie(offset, index, type, is_array, is_shadow) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
}
private:
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
std::size_t offset{};
std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
};
class ConstBuffer {
public:
void MarkAsUsed(u64 offset) {
max_offset = std::max(max_offset, static_cast<u32>(offset));
}
void MarkAsUsedIndirect() {
is_indirect = true;
}
bool IsIndirect() const {
return is_indirect;
}
u32 GetSize() const {
return max_offset + 1;
}
private:
u32 max_offset{};
bool is_indirect{};
};
struct GlobalMemoryBase {
u32 cbuf_index{};
u32 cbuf_offset{};
bool operator<(const GlobalMemoryBase& rhs) const {
return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
};
struct MetaArithmetic {
bool precise{};
};
struct MetaHalfArithmetic {
bool precise{};
std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
Tegra::Shader::HalfType::H0_H1,
Tegra::Shader::HalfType::H0_H1};
};
struct MetaTexture {
const Sampler& sampler;
u32 element{};
u32 coords_count{};
std::optional<u32> array_index;
};
constexpr MetaArithmetic PRECISE = {true};
constexpr MetaArithmetic NO_PRECISE = {false};
constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
public:
template <typename... T>
explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
template <typename... T>
explicit constexpr OperationNode(OperationCode code, Meta&& meta)
: code{code}, meta{std::move(meta)} {}
template <typename... T>
explicit constexpr OperationNode(OperationCode code, const T*... operands)
: OperationNode(code, {}, operands...) {}
template <typename... T>
explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
: code{code}, meta{std::move(meta)} {
auto operands_list = {operands_...};
for (auto& operand : operands_list) {
operands.push_back(operand);
}
}
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
: code{code}, meta{meta}, operands{std::move(operands)} {}
explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
: code{code}, meta{}, operands{std::move(operands)} {}
OperationCode GetCode() const {
return code;
}
const Meta& GetMeta() const {
return meta;
}
std::size_t GetOperandsCount() const {
return operands.size();
}
Node operator[](std::size_t operand_index) const {
return operands.at(operand_index);
}
private:
const OperationCode code;
const Meta meta;
std::vector<Node> operands;
};
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
class ConditionalNode final {
public:
explicit ConditionalNode(Node condition, std::vector<Node>&& code)
: condition{condition}, code{std::move(code)} {}
Node GetCondition() const {
return condition;
}
const std::vector<Node>& GetCode() const {
return code;
}
private:
const Node condition; ///< Condition to be satisfied
std::vector<Node> code; ///< Code to execute
};
/// A general purpose register
class GprNode final {
public:
explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {}
u32 GetIndex() const {
return static_cast<u32>(index);
}
private:
const Tegra::Shader::Register index;
};
/// A 32-bits value that represents an immediate value
class ImmediateNode final {
public:
explicit constexpr ImmediateNode(u32 value) : value{value} {}
u32 GetValue() const {
return value;
}
private:
const u32 value;
};
/// One of Maxwell's internal flags
class InternalFlagNode final {
public:
explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {}
InternalFlag GetFlag() const {
return flag;
}
private:
const InternalFlag flag;
};
/// A predicate register, it can be negated without additional nodes
class PredicateNode final {
public:
explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated)
: index{index}, negated{negated} {}
Tegra::Shader::Pred GetIndex() const {
return index;
}
bool IsNegated() const {
return negated;
}
private:
const Tegra::Shader::Pred index;
const bool negated;
};
/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
class AbufNode final {
public:
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
: input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
Node buffer = {})
: input_mode{}, buffer{buffer}, index{index}, element{element} {}
Tegra::Shader::IpaMode GetInputMode() const {
return input_mode;
}
Tegra::Shader::Attribute::Index GetIndex() const {
return index;
}
u32 GetElement() const {
return element;
}
Node GetBuffer() const {
return buffer;
}
private:
const Tegra::Shader::IpaMode input_mode;
const Node buffer;
const Tegra::Shader::Attribute::Index index;
const u32 element;
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:
explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {}
u32 GetIndex() const {
return index;
}
Node GetOffset() const {
return offset;
}
private:
const u32 index;
const Node offset;
};
/// Local memory node
class LmemNode final {
public:
explicit constexpr LmemNode(Node address) : address{address} {}
Node GetAddress() const {
return address;
}
private:
const Node address;
};
/// Global memory node
class GmemNode final {
public:
explicit constexpr GmemNode(Node real_address, Node base_address,
const GlobalMemoryBase& descriptor)
: real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
Node GetRealAddress() const {
return real_address;
}
Node GetBaseAddress() const {
return base_address;
}
const GlobalMemoryBase& GetDescriptor() const {
return descriptor;
}
private:
const Node real_address;
const Node base_address;
const GlobalMemoryBase descriptor;
};
/// Commentary, can be dropped
class CommentNode final {
public:
explicit CommentNode(std::string text) : text{std::move(text)} {}
const std::string& GetText() const {
return text;
}
private:
std::string text;
};
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
: program_code{program_code}, main_offset{main_offset} {
Decode();
}
const std::map<u32, BasicBlock>& GetBasicBlocks() const {
return basic_blocks;
}
const std::set<u32>& GetRegisters() const {
return used_registers;
}
const std::set<Tegra::Shader::Pred>& GetPredicates() const {
return used_predicates;
}
const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
GetInputAttributes() const {
return used_input_attributes;
}
const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
return used_output_attributes;
}
const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
return used_cbufs;
}
const std::set<Sampler>& GetSamplers() const {
return used_samplers;
}
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
const {
return used_clip_distances;
}
const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
return used_global_memory_bases;
}
std::size_t GetLength() const {
return static_cast<std::size_t>(coverage_end * sizeof(u64));
}
const Tegra::Shader::Header& GetHeader() const {
return header;
}
private:
void Decode();
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
BasicBlock DecodeRange(u32 begin, u32 end);
/**
* Decodes a single instruction from Tegra to IR.
* @param bb Basic block where the nodes will be written to.
* @param pc Program counter. Offset to decode.
* @return Next address to decode.
*/
u32 DecodeInstr(BasicBlock& bb, u32 pc);
u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc);
u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc);
/// Internalizes node's data and returns a managed pointer to a clone of that node
Node StoreNode(NodeData&& node_data);
/// Creates a conditional node
Node Conditional(Node condition, std::vector<Node>&& code);
/// Creates a commentary
Node Comment(const std::string& text);
/// Creates an u32 immediate
Node Immediate(u32 value);
/// Creates a s32 immediate
Node Immediate(s32 value) {
return Immediate(static_cast<u32>(value));
}
/// Creates a f32 immediate
Node Immediate(f32 value) {
u32 integral;
std::memcpy(&integral, &value, sizeof(u32));
return Immediate(integral);
}
/// Generates a node for a passed register.
Node GetRegister(Tegra::Shader::Register reg);
/// Generates a node representing a 19-bit immediate value
Node GetImmediate19(Tegra::Shader::Instruction instr);
/// Generates a node representing a 32-bit immediate value
Node GetImmediate32(Tegra::Shader::Instruction instr);
/// Generates a node representing a constant buffer
Node GetConstBuffer(u64 index, u64 offset);
/// Generates a node representing a constant buffer with a variadic offset
Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
/// Generates a node for a passed predicate. It can be optionally negated
Node GetPredicate(u64 pred, bool negated = false);
/// Generates a predicate node for an immediate true or false value
Node GetPredicate(bool immediate);
/// Generates a node representing an input attribute. Keeps track of used attributes.
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
/// Generates a node representing an output attribute. Keeps track of used attributes.
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
/// Generates a node representing an internal flag
Node GetInternalFlag(InternalFlag flag, bool negated = false);
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
/// Generates a temporal, internally it uses a post-RZ register
Node GetTemporal(u32 id);
/// Sets a register. src value must be a number-evaluated node.
void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src);
/// Sets a predicate. src value must be a bool-evaluated node
void SetPredicate(BasicBlock& bb, u64 dest, Node src);
/// Sets an internal flag. src value must be a bool-evaluated node
void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value);
/// Sets a local memory address. address and value must be a number-evaluated node
void SetLocalMemory(BasicBlock& bb, Node address, Node value);
/// Sets a temporal. Internally it uses a post-RZ register
void SetTemporal(BasicBlock& bb, u32 id, Node value);
/// Sets internal flags from a float
void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true);
/// Sets internal flags from an integer
void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true);
/// Conditionally absolute/negated float. Absolute is applied first
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
/// Conditionally saturates a float
Node GetSaturatedFloat(Node value, bool saturate = true);
/// Converts an integer to different sizes.
Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
/// Conditionally absolute/negated integer. Absolute is applied first
Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
/// Unpacks a half immediate from an instruction
Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
/// Merges a half pair into another value
Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
/// Conditionally absolute/negated half float pair. Absolute is applied first
Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
/// Returns a predicate comparing two floats
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
/// Returns a predicate comparing two integers
Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
Node op_a, Node op_b);
/// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition,
const MetaHalfArithmetic& meta, Node op_a, Node op_b);
/// Returns a predicate combiner operation
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
/// Returns a condition code evaluated from internal flags
Node GetConditionCode(Tegra::Shader::ConditionCode cc);
/// Accesses a texture sampler
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool depth_compare, bool is_array);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array, std::size_t array_offset, std::size_t bias_offset,
std::vector<Node>&& coords);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest,
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
Tegra::Shader::PredicateResultMode predicate_mode,
Tegra::Shader::Pred predicate, bool sets_cc);
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut, bool sets_cc);
Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
template <typename... T>
Node Operation(OperationCode code, const T*... operands) {
return StoreNode(OperationNode(code, operands...));
}
template <typename... T>
Node Operation(OperationCode code, Meta&& meta, const T*... operands) {
return StoreNode(OperationNode(code, std::move(meta), operands...));
}
template <typename... T>
Node Operation(OperationCode code, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(operands)));
}
template <typename... T>
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
}
template <typename... T>
Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) {
return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...));
}
template <typename... T>
Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) {
return StoreNode(
OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...));
}
static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
const ProgramCode& program_code;
const u32 main_offset;
u32 coverage_begin{};
u32 coverage_end{};
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
std::map<u32, BasicBlock> basic_blocks;
std::vector<std::unique_ptr<NodeData>> stored_nodes;
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::set<GlobalMemoryBase> used_global_memory_bases;
Tegra::Shader::Header header;
};
} // namespace VideoCommon::Shader

View File

@@ -1,76 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <utility>
#include <variant>
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
namespace {
std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
const Node node = code[cursor];
if (const auto operation = std::get_if<OperationNode>(node)) {
if (operation->GetCode() == operation_code)
return {node, cursor};
}
}
return {};
}
} // namespace
Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
// Cbuf found, but it has to be immediate
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
}
if (const auto gpr = std::get_if<GprNode>(tracked)) {
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return nullptr;
}
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
// register that it uses as operand
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
if (!source) {
return nullptr;
}
return TrackCbuf(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(tracked)) {
for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
// Cbuf found in operand
return found;
}
}
return nullptr;
}
return nullptr;
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
s64 cursor) {
for (; cursor >= 0; --cursor) {
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
if (!found_node) {
return {};
}
const auto operation = std::get_if<OperationNode>(found_node);
ASSERT(operation);
const auto& target = (*operation)[0];
if (const auto gpr_target = std::get_if<GprNode>(target)) {
if (gpr_target->GetIndex() == tracked->GetIndex()) {
return {(*operation)[1], new_cursor};
}
}
}
return {};
}
} // namespace VideoCommon::Shader

View File

@@ -50,24 +50,6 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
}
}
bool SurfaceTargetIsArray(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
case SurfaceTarget::TextureCubemap:
return false;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
return true;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return false;
}
}
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::S8_Z24_UNORM:

View File

@@ -441,8 +441,6 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
bool SurfaceTargetIsLayered(SurfaceTarget target);
bool SurfaceTargetIsArray(SurfaceTarget target);
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);

View File

@@ -16,10 +16,9 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
return static_cast<u16>(
Settings::values.resolution_factor
? Settings::values.resolution_factor
: renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
return !Settings::values.resolution_factor
? renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()
: Settings::values.resolution_factor;
}
} // namespace VideoCore

View File

@@ -1,6 +1,13 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <QApplication>
#include <QHBoxLayout>
#include <QKeyEvent>
#include <QOffscreenSurface>
#include <QOpenGLWindow>
#include <QPainter>
#include <QScreen>
#include <QWindow>
#include <fmt/format.h>
@@ -73,13 +80,36 @@ void EmuThread::run() {
render_window->moveContext();
}
class GGLContext : public Core::Frontend::GraphicsContext {
public:
explicit GGLContext(QOpenGLContext* shared_context) : surface() {
context = std::make_unique<QOpenGLContext>(shared_context);
surface.setFormat(shared_context->format());
surface.create();
}
void MakeCurrent() override {
context->makeCurrent(&surface);
}
void DoneCurrent() override {
context->doneCurrent();
}
void SwapBuffers() override {}
private:
std::unique_ptr<QOpenGLContext> context;
QOffscreenSurface surface;
};
// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
// context.
// The corresponding functionality is handled in EmuThread instead
class GGLWidgetInternal : public QGLWidget {
class GGLWidgetInternal : public QOpenGLWindow {
public:
GGLWidgetInternal(QGLFormat fmt, GRenderWindow* parent)
: QGLWidget(fmt, parent), parent(parent) {}
GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context)
: QOpenGLWindow(shared_context), parent(parent) {}
void paintEvent(QPaintEvent* ev) override {
if (do_painting) {
@@ -92,9 +122,51 @@ public:
parent->OnFramebufferSizeChanged();
}
void keyPressEvent(QKeyEvent* event) override {
InputCommon::GetKeyboard()->PressKey(event->key());
}
void keyReleaseEvent(QKeyEvent* event) override {
InputCommon::GetKeyboard()->ReleaseKey(event->key());
}
void mousePressEvent(QMouseEvent* event) override {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchBeginEvent
const auto pos{event->pos()};
if (event->button() == Qt::LeftButton) {
const auto [x, y] = parent->ScaleTouch(pos);
parent->TouchPressed(x, y);
} else if (event->button() == Qt::RightButton) {
InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
}
}
void mouseMoveEvent(QMouseEvent* event) override {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchUpdateEvent
const auto pos{event->pos()};
const auto [x, y] = parent->ScaleTouch(pos);
parent->TouchMoved(x, y);
InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y());
}
void mouseReleaseEvent(QMouseEvent* event) override {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchEndEvent
if (event->button() == Qt::LeftButton)
parent->TouchReleased();
else if (event->button() == Qt::RightButton)
InputCommon::GetMotionEmu()->EndTilt();
}
void DisablePainting() {
do_painting = false;
}
void EnablePainting() {
do_painting = true;
}
@@ -105,7 +177,7 @@ private:
};
GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
: QWidget(parent), child(nullptr), emu_thread(emu_thread) {
: QWidget(parent), child(nullptr), context(nullptr), emu_thread(emu_thread) {
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
.arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
@@ -129,19 +201,19 @@ void GRenderWindow::moveContext() {
auto thread = (QThread::currentThread() == qApp->thread() && emu_thread != nullptr)
? emu_thread
: qApp->thread();
child->context()->moveToThread(thread);
context->moveToThread(thread);
}
void GRenderWindow::SwapBuffers() {
// In our multi-threaded QGLWidget use case we shouldn't need to call `makeCurrent`,
// In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
// since we never call `doneCurrent` in this thread.
// However:
// - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
// since the last time `swapBuffers` was executed;
// - On macOS, if `makeCurrent` isn't called explicitely, resizing the buffer breaks.
child->makeCurrent();
context->makeCurrent(child);
child->swapBuffers();
context->swapBuffers(child);
if (!first_frame) {
emit FirstFrameDisplayed();
first_frame = true;
@@ -149,11 +221,11 @@ void GRenderWindow::SwapBuffers() {
}
void GRenderWindow::MakeCurrent() {
child->makeCurrent();
context->makeCurrent(child);
}
void GRenderWindow::DoneCurrent() {
child->doneCurrent();
context->doneCurrent();
}
void GRenderWindow::PollEvents() {}
@@ -166,14 +238,26 @@ void GRenderWindow::PollEvents() {}
void GRenderWindow::OnFramebufferSizeChanged() {
// Screen changes potentially incur a change in screen DPI, hence we should update the
// framebuffer size
qreal pixelRatio = windowPixelRatio();
qreal pixelRatio = GetWindowPixelRatio();
unsigned width = child->QPaintDevice::width() * pixelRatio;
unsigned height = child->QPaintDevice::height() * pixelRatio;
UpdateCurrentFramebufferLayout(width, height);
}
void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) {
if (child) {
child->keyPressEvent(event);
}
}
void GRenderWindow::ForwardKeyReleaseEvent(QKeyEvent* event) {
if (child) {
child->keyReleaseEvent(event);
}
}
void GRenderWindow::BackupGeometry() {
geometry = ((QGLWidget*)this)->saveGeometry();
geometry = ((QWidget*)this)->saveGeometry();
}
void GRenderWindow::RestoreGeometry() {
@@ -191,18 +275,18 @@ QByteArray GRenderWindow::saveGeometry() {
// If we are a top-level widget, store the current geometry
// otherwise, store the last backup
if (parent() == nullptr)
return ((QGLWidget*)this)->saveGeometry();
return ((QWidget*)this)->saveGeometry();
else
return geometry;
}
qreal GRenderWindow::windowPixelRatio() const {
qreal GRenderWindow::GetWindowPixelRatio() const {
// windowHandle() might not be accessible until the window is displayed to screen.
return windowHandle() ? windowHandle()->screen()->devicePixelRatio() : 1.0f;
}
std::pair<unsigned, unsigned> GRenderWindow::ScaleTouch(const QPointF pos) const {
const qreal pixel_ratio = windowPixelRatio();
const qreal pixel_ratio = GetWindowPixelRatio();
return {static_cast<unsigned>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})),
static_cast<unsigned>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))};
}
@@ -212,47 +296,6 @@ void GRenderWindow::closeEvent(QCloseEvent* event) {
QWidget::closeEvent(event);
}
void GRenderWindow::keyPressEvent(QKeyEvent* event) {
InputCommon::GetKeyboard()->PressKey(event->key());
}
void GRenderWindow::keyReleaseEvent(QKeyEvent* event) {
InputCommon::GetKeyboard()->ReleaseKey(event->key());
}
void GRenderWindow::mousePressEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchBeginEvent
auto pos = event->pos();
if (event->button() == Qt::LeftButton) {
const auto [x, y] = ScaleTouch(pos);
this->TouchPressed(x, y);
} else if (event->button() == Qt::RightButton) {
InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
}
}
void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchUpdateEvent
auto pos = event->pos();
const auto [x, y] = ScaleTouch(pos);
this->TouchMoved(x, y);
InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y());
}
void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem)
return; // touch input is handled in TouchEndEvent
if (event->button() == Qt::LeftButton)
this->TouchReleased();
else if (event->button() == Qt::RightButton)
InputCommon::GetMotionEmu()->EndTilt();
}
void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) {
// TouchBegin always has exactly one touch point, so take the .first()
const auto [x, y] = ScaleTouch(event->touchPoints().first().pos());
@@ -305,35 +348,60 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) {
NotifyClientAreaSizeChanged(std::make_pair(width, height));
}
void GRenderWindow::InitRenderTarget() {
if (child) {
delete child;
}
std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
return std::make_unique<GGLContext>(shared_context.get());
}
if (layout()) {
delete layout();
}
void GRenderWindow::InitRenderTarget() {
shared_context.reset();
context.reset();
delete child;
child = nullptr;
delete container;
container = nullptr;
delete layout();
first_frame = false;
// TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
// WA_DontShowOnScreen, WA_DeleteOnClose
QGLFormat fmt;
QSurfaceFormat fmt;
fmt.setVersion(4, 3);
fmt.setProfile(QGLFormat::CoreProfile);
fmt.setProfile(QSurfaceFormat::CoreProfile);
// TODO: expose a setting for buffer value (ie default/single/double/triple)
fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
shared_context = std::make_unique<QOpenGLContext>();
shared_context->setFormat(fmt);
shared_context->create();
context = std::make_unique<QOpenGLContext>();
context->setShareContext(shared_context.get());
context->setFormat(fmt);
context->create();
fmt.setSwapInterval(false);
// Requests a forward-compatible context, which is required to get a 3.2+ context on OS X
fmt.setOption(QGL::NoDeprecatedFunctions);
child = new GGLWidgetInternal(this, shared_context.get());
container = QWidget::createWindowContainer(child, this);
child = new GGLWidgetInternal(fmt, this);
QBoxLayout* layout = new QHBoxLayout(this);
resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
layout->addWidget(child);
layout->addWidget(container);
layout->setMargin(0);
setLayout(layout);
// Reset minimum size to avoid unwanted resizes when this function is called for a second time.
setMinimumSize(1, 1);
// Show causes the window to actually be created and the OpenGL context as well, but we don't
// want the widget to be shown yet, so immediately hide it.
show();
hide();
resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
child->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
container->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
OnFramebufferSizeChanged();

View File

@@ -7,9 +7,9 @@
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <QGLWidget>
#include <QImage>
#include <QThread>
#include <QWidget>
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
@@ -21,6 +21,8 @@ class QTouchEvent;
class GGLWidgetInternal;
class GMainWindow;
class GRenderWindow;
class QSurface;
class QOpenGLContext;
class EmuThread : public QThread {
Q_OBJECT
@@ -115,25 +117,21 @@ public:
void MakeCurrent() override;
void DoneCurrent() override;
void PollEvents() override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
void ForwardKeyPressEvent(QKeyEvent* event);
void ForwardKeyReleaseEvent(QKeyEvent* event);
void BackupGeometry();
void RestoreGeometry();
void restoreGeometry(const QByteArray& geometry); // overridden
QByteArray saveGeometry(); // overridden
qreal windowPixelRatio() const;
qreal GetWindowPixelRatio() const;
std::pair<unsigned, unsigned> ScaleTouch(const QPointF pos) const;
void closeEvent(QCloseEvent* event) override;
void keyPressEvent(QKeyEvent* event) override;
void keyReleaseEvent(QKeyEvent* event) override;
void mousePressEvent(QMouseEvent* event) override;
void mouseMoveEvent(QMouseEvent* event) override;
void mouseReleaseEvent(QMouseEvent* event) override;
bool event(QEvent* event) override;
void focusOutEvent(QFocusEvent* event) override;
void OnClientAreaResized(unsigned width, unsigned height);
@@ -155,7 +153,6 @@ signals:
void FirstFrameDisplayed();
private:
std::pair<unsigned, unsigned> ScaleTouch(const QPointF pos) const;
void TouchBeginEvent(const QTouchEvent* event);
void TouchUpdateEvent(const QTouchEvent* event);
void TouchEndEvent();
@@ -163,11 +160,17 @@ private:
void OnMinimalClientAreaChangeRequest(
const std::pair<unsigned, unsigned>& minimal_size) override;
GGLWidgetInternal* child;
QWidget* container = nullptr;
GGLWidgetInternal* child = nullptr;
QByteArray geometry;
EmuThread* emu_thread;
// Context that backs the GGLWidgetInternal (and will be used by core to render)
std::unique_ptr<QOpenGLContext> context;
// Context that will be shared between all newly created contexts. This should never be made
// current
std::unique_ptr<QOpenGLContext> shared_context;
/// Temporary storage of the screenshot taken
QImage screenshot_image;

View File

@@ -7,6 +7,7 @@
#include "common/file_util.h"
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
#include "ui_configure_debug.h"

View File

@@ -39,7 +39,6 @@ void ConfigureDialog::applyConfiguration() {
ui->debugTab->applyConfiguration();
ui->webTab->applyConfiguration();
Settings::Apply();
Settings::LogSettings();
}
void ConfigureDialog::PopulateSelectionList() {

View File

@@ -7,7 +7,6 @@
#include <utility>
#include <QColorDialog>
#include <QGridLayout>
#include <QKeyEvent>
#include <QMenu>
#include <QMessageBox>
#include <QTimer>

View File

@@ -11,21 +11,17 @@
#include <string>
#include <QDialog>
#include <QKeyEvent>
#include "common/param_package.h"
#include "core/settings.h"
#include "input_common/main.h"
#include "ui_configure_input.h"
class QKeyEvent;
class QPushButton;
class QString;
class QTimer;
namespace InputCommon::Polling {
class DevicePoller;
enum class DeviceType;
} // namespace InputCommon::Polling
namespace Ui {
class ConfigureInputPlayer;
}

View File

@@ -8,6 +8,7 @@
#include <QHeaderView>
#include <QMenu>
#include <QMessageBox>
#include <QStandardItemModel>
#include <QString>
#include <QTimer>

View File

@@ -7,16 +7,16 @@
#include <memory>
#include <vector>
#include <QDialog>
#include <QKeyEvent>
#include <QList>
#include <QWidget>
#include "core/file_sys/vfs_types.h"
class QTreeView;
class QGraphicsScene;
class QStandardItem;
class QStandardItemModel;
class QTreeView;
class QVBoxLayout;
namespace Ui {
class ConfigurePerGameGeneral;

View File

@@ -2,19 +2,23 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <chrono>
#include <optional>
#include <algorithm>
#include <QFileDialog>
#include <QGraphicsItem>
#include <QGraphicsScene>
#include <QHeaderView>
#include <QMessageBox>
#include <QStandardItemModel>
#include <QTreeView>
#include <QVBoxLayout>
#include "common/assert.h"
#include "common/file_util.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/settings.h"
#include "ui_configure_system.h"
#include "yuzu/configuration/configure_system.h"
#include "yuzu/util/limitable_input_dialog.h"
namespace {
constexpr std::array<int, 12> days_in_month = {{

View File

@@ -6,6 +6,8 @@
#include <memory>
#include <QDialog>
#include <QWidget>
#include "yuzu/configuration/config.h"
namespace Ui {
class ConfigureTouchscreenAdvanced;

View File

@@ -14,7 +14,6 @@
#include "configuration/configure_per_general.h"
#include "core/file_sys/vfs.h"
#include "core/file_sys/vfs_real.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/hle/service/acc/profile_manager.h"
#include "core/hle/service/am/applets/applets.h"
#include "core/hle/service/hid/controllers/npad.h"
@@ -748,15 +747,13 @@ bool GMainWindow::LoadROM(const QString& filename) {
ShutdownGame();
render_window->InitRenderTarget();
render_window->MakeCurrent();
{
Core::Frontend::ScopeAcquireWindowContext acquire_context{*render_window};
if (!gladLoadGL()) {
QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
tr("Your GPU may not support OpenGL 4.3, or you do not "
"have the latest graphics driver."));
return false;
}
if (!gladLoadGL()) {
QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
tr("Your GPU may not support OpenGL 4.3, or you do not "
"have the latest graphics driver."));
return false;
}
QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions();
@@ -797,6 +794,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
"wiki</a>. This message will not be shown again."));
}
render_window->DoneCurrent();
if (result != Core::System::ResultStatus::Success) {
switch (result) {
case Core::System::ResultStatus::ErrorGetLoader:
@@ -1966,6 +1965,18 @@ void GMainWindow::dragMoveEvent(QDragMoveEvent* event) {
event->acceptProposedAction();
}
void GMainWindow::keyPressEvent(QKeyEvent* event) {
if (render_window) {
render_window->ForwardKeyPressEvent(event);
}
}
void GMainWindow::keyReleaseEvent(QKeyEvent* event) {
if (render_window) {
render_window->ForwardKeyReleaseEvent(event);
}
}
bool GMainWindow::ConfirmChangeGame() {
if (emu_thread == nullptr)
return true;
@@ -2033,7 +2044,8 @@ int main(int argc, char* argv[]) {
QCoreApplication::setOrganizationName("yuzu team");
QCoreApplication::setApplicationName("yuzu");
QApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
// Enables the core to make the qt created contexts current on std::threads
QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
QApplication app(argc, argv);
// Qt changes the locale and causes issues in float conversion using std::to_string() when
@@ -2043,9 +2055,6 @@ int main(int argc, char* argv[]) {
GMainWindow main_window;
// After settings have been loaded by GMainWindow, apply the filter
main_window.show();
Settings::LogSettings();
int result = app.exec();
detached_tasks.WaitForAllTasks();
return result;

View File

@@ -251,4 +251,8 @@ protected:
void dropEvent(QDropEvent* event) override;
void dragEnterEvent(QDragEnterEvent* event) override;
void dragMoveEvent(QDragMoveEvent* event) override;
// Overrides used to forward signals to the render window when the focus moves out.
void keyPressEvent(QKeyEvent* event) override;
void keyReleaseEvent(QKeyEvent* event) override;
};

View File

@@ -19,6 +19,37 @@
#include "input_common/sdl/sdl.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
class SDLGLContext : public Core::Frontend::GraphicsContext {
public:
explicit SDLGLContext() {
// create a hidden window to make the shared context against
window = SDL_CreateWindow("", SDL_WINDOWPOS_UNDEFINED, // x position
SDL_WINDOWPOS_UNDEFINED, // y position
Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
context = SDL_GL_CreateContext(window);
}
~SDLGLContext() {
SDL_GL_DeleteContext(context);
SDL_DestroyWindow(window);
}
void MakeCurrent() override {
SDL_GL_MakeCurrent(window, context);
}
void DoneCurrent() override {
SDL_GL_MakeCurrent(window, nullptr);
}
void SwapBuffers() override {}
private:
SDL_Window* window;
SDL_GLContext context;
};
void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0));
InputCommon::GetMotionEmu()->Tilt(x, y);
@@ -153,6 +184,7 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
Common::g_scm_branch, Common::g_scm_desc);
@@ -171,7 +203,6 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
if (fullscreen) {
Fullscreen();
}
gl_context = SDL_GL_CreateContext(render_window);
if (gl_context == nullptr) {
@@ -195,7 +226,6 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
SDL_GL_SetSwapInterval(false);
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
Common::g_scm_desc);
Settings::LogSettings();
DoneCurrent();
}
@@ -281,3 +311,7 @@ void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(
SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
}
std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2::CreateSharedContext() const {
return std::make_unique<SDLGLContext>();
}

View File

@@ -27,6 +27,8 @@ public:
/// Releases the GL context from the caller thread
void DoneCurrent() override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
/// Whether the window is still open, and a close request hasn't yet been sent
bool IsOpen() const;