Compare commits

..

3 Commits

Author SHA1 Message Date
ReinUsesLisp
730eb1dad7 vk_stream_buffer: Remove copy code path 2019-02-26 02:09:43 -03:00
ReinUsesLisp
33a0597603 vk_stream_buffer: Implement a stream buffer
This manages two kinds of streaming buffers: one for unified memory
models and one for dedicated GPUs. The first one skips the copy from the
staging buffer to the real buffer, since it creates an unified buffer.

This implementation waits for all fences to finish their operation
before "invalidating". This is suboptimal since it should allocate
another buffer or start searching from the beginning. There is room for
improvement here.

This could also handle AMD's "pinned" memory (a heap with 256 MiB) that
seems to be designed for buffer streaming.
2019-02-24 04:27:51 -03:00
ReinUsesLisp
281a8bf259 vk_resource_manager: Minor VKFenceWatch changes 2019-02-24 04:19:04 -03:00
31 changed files with 847 additions and 935 deletions

View File

@@ -73,7 +73,6 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"
"${VIDEO_CORE}/shader/decode/texture.cpp"
"${VIDEO_CORE}/shader/decode/other.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"

View File

@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
}
}
state.yn1 = static_cast<s16>(yn1);
state.yn2 = static_cast<s16>(yn2);
state.yn1 = yn1;
state.yn2 = yn2;
return ret;
}

View File

@@ -46,7 +46,7 @@ public:
}
}
~CubebSinkStream() override {
~CubebSinkStream() {
if (!ctx) {
return;
}
@@ -75,11 +75,11 @@ public:
queue.Push(samples);
}
std::size_t SamplesInQueue(u32 channel_count) const override {
std::size_t SamplesInQueue(u32 num_channels) const override {
if (!ctx)
return 0;
return queue.Size() / channel_count;
return queue.Size() / num_channels;
}
void Flush() override {
@@ -98,7 +98,7 @@ private:
u32 num_channels{};
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame{};
std::array<s16, 2> last_frame;
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;

View File

@@ -47,7 +47,6 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"
"${VIDEO_CORE}/shader/decode/texture.cpp"
"${VIDEO_CORE}/shader/decode/other.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"

View File

@@ -28,8 +28,8 @@
#include <cstring>
#include "common/common_types.h"
// GCC
#ifdef __GNUC__
// GCC 4.6+
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
#endif
// LLVM/clang
#elif defined(__clang__)
#elif __clang__
#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1

View File

@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
if (offset + length > data.size())
data.resize(offset + length);
const auto write = std::min(length, data.size() - offset);
std::memcpy(data.data() + offset, data_, write);
std::memcpy(data.data(), data_, write);
return write;
}

View File

@@ -28,13 +28,9 @@ namespace Service::NVFlinger {
constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
displays.emplace_back(0, "Default");
displays.emplace_back(1, "External");
displays.emplace_back(2, "Edid");
displays.emplace_back(3, "Internal");
displays.emplace_back(4, "Null");
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing)
: displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}},
core_timing{core_timing} {
// Schedule the screen composition events
composition_event =
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -59,14 +55,13 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
// TODO(Subv): Currently we only support the Default display.
ASSERT(name == "Default");
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetName() == name; });
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.name == name; });
if (itr == displays.end()) {
return {};
}
return itr->GetID();
return itr->id;
}
std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -76,10 +71,13 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
return {};
}
ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
const u64 layer_id = next_layer_id++;
const u32 buffer_queue_id = next_buffer_queue_id++;
buffer_queues.emplace_back(buffer_queue_id, layer_id);
display->CreateLayer(layer_id, buffer_queues.back());
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
display->layers.emplace_back(layer_id, buffer_queue);
buffer_queues.emplace_back(std::move(buffer_queue));
return layer_id;
}
@@ -90,7 +88,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
return {};
}
return layer->GetBufferQueue().GetId();
return layer->buffer_queue->GetId();
}
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -100,20 +98,12 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
return nullptr;
}
return display->GetVSyncEvent();
return display->vsync_event.readable;
}
BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[id](const auto& queue) { return queue.GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[id](const auto& queue) { return queue.GetId() == id; });
[&](const auto& queue) { return queue->GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
@@ -122,7 +112,7 @@ const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
VI::Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
[&](const VI::Display& display) { return display.id == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -134,7 +124,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
[&](const VI::Display& display) { return display.id == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -150,7 +140,14 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
return nullptr;
}
return display->FindLayer(layer_id);
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const VI::Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
@@ -160,24 +157,33 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
return nullptr;
}
return display->FindLayer(layer_id);
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const VI::Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
void NVFlinger::Compose() {
for (auto& display : displays) {
// Trigger vsync for this display at the end of drawing
SCOPE_EXIT({ display.SignalVSyncEvent(); });
SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
// Don't do anything for displays without layers.
if (!display.HasLayers())
if (display.layers.empty())
continue;
// TODO(Subv): Support more than 1 layer.
VI::Layer& layer = display.GetLayer(0);
auto& buffer_queue = layer.GetBufferQueue();
ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
VI::Layer& layer = display.layers[0];
auto& buffer_queue = layer.buffer_queue;
// Search for a queued buffer and acquire it
auto buffer = buffer_queue.AcquireBuffer();
auto buffer = buffer_queue->AcquireBuffer();
MicroProfileFlip();
@@ -202,7 +208,7 @@ void NVFlinger::Compose() {
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);
buffer_queue.ReleaseBuffer(buffer->get().slot);
buffer_queue->ReleaseBuffer(buffer->get().slot);
}
}

View File

@@ -28,8 +28,8 @@ class Module;
} // namespace Service::Nvidia
namespace Service::VI {
class Display;
class Layer;
struct Display;
struct Layer;
} // namespace Service::VI
namespace Service::NVFlinger {
@@ -65,10 +65,7 @@ public:
Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
/// Obtains a buffer queue identified by the ID.
BufferQueue& FindBufferQueue(u32 id);
/// Obtains a buffer queue identified by the ID.
const BufferQueue& FindBufferQueue(u32 id) const;
std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
/// finished.
@@ -90,7 +87,7 @@ private:
std::shared_ptr<Nvidia::Module> nvdrv;
std::vector<VI::Display> displays;
std::vector<BufferQueue> buffer_queues;
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
/// Id to use for the next layer that is created, this counter is shared among all displays.
u64 next_layer_id = 1;

View File

@@ -2,12 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <utility>
#include <fmt/format.h>
#include "common/assert.h"
#include "core/core.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/service/vi/display/vi_display.h"
@@ -23,49 +19,4 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
Display::~Display() = default;
Layer& Display::GetLayer(std::size_t index) {
return layers.at(index);
}
const Layer& Display::GetLayer(std::size_t index) const {
return layers.at(index);
}
Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
return vsync_event.readable;
}
void Display::SignalVSyncEvent() {
vsync_event.writable->Signal();
}
void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
layers.emplace_back(id, buffer_queue);
}
Layer* Display::FindLayer(u64 id) {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
const Layer* Display::FindLayer(u64 id) const {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
} // namespace Service::VI

View File

@@ -10,84 +10,14 @@
#include "common/common_types.h"
#include "core/hle/kernel/writable_event.h"
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::VI {
class Layer;
struct Layer;
/// Represents a single display type
class Display {
public:
/// Constructs a display with a given unique ID and name.
///
/// @param id The unique ID for this display.
/// @param name The name for this display.
///
struct Display {
Display(u64 id, std::string name);
~Display();
Display(const Display&) = delete;
Display& operator=(const Display&) = delete;
Display(Display&&) = default;
Display& operator=(Display&&) = default;
/// Gets the unique ID assigned to this display.
u64 GetID() const {
return id;
}
/// Gets the name of this display
const std::string& GetName() const {
return name;
}
/// Whether or not this display has any layers added to it.
bool HasLayers() const {
return !layers.empty();
}
/// Gets a layer for this display based off an index.
Layer& GetLayer(std::size_t index);
/// Gets a layer for this display based off an index.
const Layer& GetLayer(std::size_t index) const;
/// Gets the readable vsync event.
Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
/// Signals the internal vsync event.
void SignalVSyncEvent();
/// Creates and adds a layer to this display with the given ID.
///
/// @param id The ID to assign to the created layer.
/// @param buffer_queue The buffer queue for the layer instance to use.
///
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
Layer* FindLayer(u64 id);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
const Layer* FindLayer(u64 id) const;
private:
u64 id;
std::string name;

View File

@@ -6,7 +6,8 @@
namespace Service::VI {
Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue)
: id{id}, buffer_queue{std::move(queue)} {}
Layer::~Layer() = default;

View File

@@ -4,6 +4,8 @@
#pragma once
#include <memory>
#include "common/common_types.h"
namespace Service::NVFlinger {
@@ -12,41 +14,12 @@ class BufferQueue;
namespace Service::VI {
/// Represents a single display layer.
class Layer {
public:
/// Constructs a layer with a given ID and buffer queue.
///
/// @param id The ID to assign to this layer.
/// @param queue The buffer queue for this layer to use.
///
Layer(u64 id, NVFlinger::BufferQueue& queue);
struct Layer {
Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue);
~Layer();
Layer(const Layer&) = delete;
Layer& operator=(const Layer&) = delete;
Layer(Layer&&) = default;
Layer& operator=(Layer&&) = delete;
/// Gets the ID for this layer.
u64 GetID() const {
return id;
}
/// Gets a reference to the buffer queue this layer is using.
NVFlinger::BufferQueue& GetBufferQueue() {
return buffer_queue;
}
/// Gets a const reference to the buffer queue this layer is using.
const NVFlinger::BufferQueue& GetBufferQueue() const {
return buffer_queue;
}
private:
u64 id;
NVFlinger::BufferQueue& buffer_queue;
std::shared_ptr<NVFlinger::BufferQueue> buffer_queue;
};
} // namespace Service::VI

View File

@@ -525,7 +525,7 @@ private:
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
static_cast<u32>(transaction), flags);
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
if (transaction == TransactionId::Connect) {
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
} else if (transaction == TransactionId::SetPreallocatedBuffer) {
IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
IGBPSetPreallocatedBufferResponseParcel response{};
ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
const u32 width{request.data.width};
const u32 height{request.data.height};
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
if (slot) {
// Buffer is available
@@ -559,8 +559,8 @@ private:
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
},
buffer_queue.GetWritableBufferWaitEvent());
buffer_queue->GetWritableBufferWaitEvent());
}
} else if (transaction == TransactionId::RequestBuffer) {
IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
auto& buffer = buffer_queue.RequestBuffer(request.slot);
auto& buffer = buffer_queue->RequestBuffer(request.slot);
IGBPRequestBufferResponseParcel response{buffer};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::QueueBuffer) {
IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
IGBPQueueBufferResponseParcel response{1280, 720};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::Query) {
IGBPQueryRequestParcel request{ctx.ReadBuffer()};
const u32 value =
buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
u32 value =
buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
IGBPQueryResponseParcel response{value};
ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
const auto buffer_queue = nv_flinger->FindBufferQueue(id);
// TODO(Subv): Find out what this actually is.
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(RESULT_SUCCESS);
rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
}
std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,7 +752,6 @@ public:
{1102, nullptr, "GetDisplayResolution"},
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
{2011, nullptr, "DestroyManagedLayer"},
{2012, nullptr, "CreateStrayLayer"},
{2050, nullptr, "CreateIndirectLayer"},
{2051, nullptr, "DestroyIndirectLayer"},
{2052, nullptr, "CreateIndirectProducerEndPoint"},

View File

@@ -74,7 +74,6 @@ add_library(video_core STATIC
shader/decode/hfma2.cpp
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/texture.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp
@@ -112,7 +111,9 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h)
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -33,36 +33,18 @@ void DmaPusher::DispatchCalls() {
}
bool DmaPusher::Step() {
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
if (dma_get != dma_put) {
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
bool non_main = command_list_header.is_non_main;
const CommandHeader command_header{Memory::Read32(*address)};
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
dma_get += sizeof(u32);
if (command_list_header.size == 0) {
return true;
}
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size);
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {
if (!non_main) {
dma_mget = dma_get;
}
// now, see if we're in the middle of a command
if (dma_state.length_pending) {
@@ -109,11 +91,22 @@ bool DmaPusher::Step() {
break;
}
}
}
} else if (ib_enable && !dma_pushbuffer.empty()) {
// Current pushbuffer empty, but we have more IB entries to read
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
dma_get = command_list_header.addr;
dma_put = dma_get + command_list_header.size * sizeof(u32);
non_main = command_list_header.is_non_main;
if (!non_main) {
// TODO (degasus): This is dead code, as dma_mget is never read.
dma_mget = dma_put;
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
} else {
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
return {};
}
return true;

View File

@@ -75,8 +75,6 @@ private:
GPU& gpu;
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
@@ -91,8 +89,11 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
GPUVAddr dma_put{}; ///< pushbuffer current end address
GPUVAddr dma_get{}; ///< pushbuffer current read address
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
bool non_main{}; ///< non-main pushbuffer active
};
} // namespace Tegra

View File

@@ -325,11 +325,11 @@ enum class TextureQueryType : u64 {
enum class TextureProcessMode : u64 {
None = 0,
LZ = 1, // Load LOD of zero.
LZ = 1, // Unknown, appears to be the same as none.
LB = 2, // Load Bias.
LL = 3, // Load LOD.
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
LL = 3, // Load LOD (LevelOfDetail)
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
};
enum class TextureMiscMode : u64 {
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
};
enum class IpaInterpMode : u64 {
Pass = 0,
Multiply = 1,
Constant = 2,
Linear = 0,
Perspective = 1,
Flat = 2,
Sc = 3,
};
@@ -1446,7 +1446,6 @@ public:
Flow,
Synch,
Memory,
Texture,
FloatSet,
FloatSetPredicate,
IntegerSet,
@@ -1577,14 +1576,14 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),

View File

@@ -16,13 +16,6 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
struct Header {
@@ -91,15 +84,9 @@ struct Header {
} vtg;
struct {
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
INSERT_PADDING_BYTES(2); // ImapColor
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -116,28 +103,6 @@ struct Header {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
}
}
return result;
}
} ps;
};

View File

@@ -1257,11 +1257,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
if (old_params.pixel_format == new_params.pixel_format)
FastLayeredCopySurface(old_surface, new_surface);
else {
AccurateCopySurface(old_surface, new_surface);
}
FastLayeredCopySurface(old_surface, new_surface);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",

View File

@@ -5,9 +5,7 @@
#include <array>
#include <string>
#include <string_view>
#include <utility>
#include <variant>
#include <vector>
#include <fmt/format.h>
@@ -22,7 +20,6 @@
namespace OpenGL::GLShader {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
@@ -291,22 +288,34 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(AttributeUse attribute) {
std::string GetInputFlags(const IpaMode& input_mode) {
const IpaSampleMode sample_mode = input_mode.sampling_mode;
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
std::string out;
switch (attribute) {
case AttributeUse::Constant:
switch (interp_mode) {
case IpaInterpMode::Flat:
out += "flat ";
break;
case AttributeUse::ScreenLinear:
case IpaInterpMode::Linear:
out += "noperspective ";
break;
case AttributeUse::Perspective:
case IpaInterpMode::Perspective:
// Default, Smooth
break;
default:
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
UNREACHABLE();
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
}
switch (sample_mode) {
case IpaSampleMode::Centroid:
// It can be implemented with the "centroid " keyword in GLSL
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
break;
case IpaSampleMode::Default:
// Default, n/a
break;
default:
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
}
return out;
}
@@ -315,11 +324,16 @@ private:
const auto& attributes = ir.GetInputAttributes();
for (const auto element : attributes) {
const Attribute::Index index = element.first;
const IpaMode& input_mode = *element.second.begin();
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
// Skip when it's not a generic attribute
continue;
}
ASSERT(element.second.size() > 0);
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
"Multiple input flag modes are not supported in GLSL");
// TODO(bunnei): Use proper number of elements for these
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != ShaderStage::Vertex) {
@@ -331,14 +345,8 @@ private:
if (stage == ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
std::string suffix;
if (stage == ShaderStage::Fragment) {
const auto input_mode =
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
suffix = GetInputFlags(input_mode);
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
attr + ';');
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
@@ -719,7 +727,7 @@ private:
}
std::string GenerateTexture(Operation operation, const std::string& func,
const std::vector<std::pair<Type, Node>>& extras) {
bool is_extra_int = false) {
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -740,47 +748,36 @@ private:
expr += Visit(operation[i]);
const std::size_t next = i + 1;
if (next < count)
if (next < count || has_array || has_shadow)
expr += ", ";
}
if (has_array) {
expr += ", float(ftoi(" + Visit(meta->array) + "))";
expr += "float(ftoi(" + Visit(meta->array) + "))";
}
if (has_shadow) {
expr += ", " + Visit(meta->depth_compare);
if (has_array)
expr += ", ";
expr += Visit(meta->depth_compare);
}
expr += ')';
for (const auto& extra_pair : extras) {
const auto [type, operand] = extra_pair;
if (operand == nullptr) {
continue;
}
for (const Node extra : meta->extras) {
expr += ", ";
switch (type) {
case Type::Int:
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
if (is_extra_int) {
if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
// Inline the string as an immediate integer in GLSL (some extra arguments are
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
expr += "ftoi(" + Visit(operand) + ')';
expr += "ftoi(" + Visit(extra) + ')';
}
break;
case Type::Float:
expr += Visit(operand);
break;
default: {
const auto type_int = static_cast<u32>(type);
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
expr += '0';
break;
}
} else {
expr += Visit(extra);
}
}
return expr + ')';
expr += ')';
return expr;
}
std::string Assign(Operation operation) {
@@ -1159,7 +1156,7 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
std::string expr = GenerateTexture(operation, "texture");
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1170,7 +1167,7 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
std::string expr = GenerateTexture(operation, "textureLod");
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1181,8 +1178,7 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
GetSwizzle(meta->element);
}
@@ -1211,8 +1207,8 @@ private:
ASSERT(meta);
if (meta->element < 2) {
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
" * vec2(256))" + GetSwizzle(meta->element) + "))";
return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
GetSwizzle(meta->element) + "))";
}
return "0";
}
@@ -1238,9 +1234,9 @@ private:
else if (next < count)
expr += ", ";
}
if (meta->lod) {
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
expr += ", ";
expr += CastOperand(Visit(meta->lod), Type::Int);
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
}
expr += ')';
@@ -1588,4 +1584,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (location = 0) in noperspective vec4 position;
layout (location = 0) in vec4 position;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
protected_resources.push_back(resource);
}
void VKFence::Unprotect(const VKResource* resource) {
void VKFence::Unprotect(VKResource* resource) {
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
if (it != protected_resources.end()) {
protected_resources.erase(it);
}
ASSERT(it != protected_resources.end());
resource->OnFenceRemoval(this);
protected_resources.erase(it);
}
VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
}
void VKFenceWatch::Wait() {
if (!fence) {
if (fence == nullptr) {
return;
}
fence->Wait();
fence->Unprotect(this);
fence = nullptr;
}
void VKFenceWatch::Watch(VKFence& new_fence) {

View File

@@ -63,7 +63,7 @@ public:
void Protect(VKResource* resource);
/// Removes protection for a resource.
void Unprotect(const VKResource* resource);
void Unprotect(VKResource* resource);
/// Retreives the fence.
operator vk::Fence() const {

View File

@@ -0,0 +1,90 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <memory>
#include <optional>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
pipeline_stage} {
CreateBuffers(memory_manager, usage);
ReserveWatches(WATCHES_INITIAL_RESERVE);
}
VKStreamBuffer::~VKStreamBuffer() = default;
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
ASSERT(size <= buffer_size);
mapped_size = size;
if (offset + size > buffer_size) {
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
// reset the state.
invalidation_mark = used_watches;
used_watches = 0;
offset = 0;
}
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
}
VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
if (invalidation_mark) {
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
exctx = scheduler.Flush();
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
[&](auto& resource) { resource->Wait(); });
invalidation_mark = std::nullopt;
}
if (used_watches + 1 >= watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(WATCHES_RESERVE_CHUNK);
}
// Add a watch for this allocation.
watches[used_watches++]->Watch(exctx.GetFence());
offset += size;
return exctx;
}
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
commit = memory_manager.Commit(*buffer, true);
mapped_pointer = commit->GetData();
}
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
const std::size_t previous_size = watches.size();
watches.resize(previous_size + grow_size);
std::generate(watches.begin() + previous_size, watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); });
}
} // namespace Vulkan

View File

@@ -0,0 +1,72 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <optional>
#include <tuple>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
namespace Vulkan {
class VKDevice;
class VKFence;
class VKFenceWatch;
class VKResourceManager;
class VKScheduler;
class VKStreamBuffer {
public:
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
~VKStreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
* offset and a boolean that's true when buffer has been invalidated.
*/
std::tuple<u8*, u64, bool> Reserve(u64 size);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
vk::Buffer GetBuffer() const {
return *buffer;
}
private:
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
/// Increases the amount of watches available.
void ReserveWatches(std::size_t grow_size);
const VKDevice& device; ///< Vulkan device manager.
VKScheduler& scheduler; ///< Command scheduler.
const u64 buffer_size; ///< Total size of the stream buffer.
const vk::AccessFlags access; ///< Access usage of this stream buffer.
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
UniqueBuffer buffer; ///< Mapped buffer.
VKMemoryCommit commit; ///< Memory commit.
u8* mapped_pointer{}; ///< Pointer to the host visible commit
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t>
invalidation_mark{}; ///< Number of watches used in the current invalidation.
};
} // namespace Vulkan

View File

@@ -165,7 +165,6 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},

View File

@@ -17,6 +17,24 @@ using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::TextureMiscMode;
using Tegra::Shader::TextureProcessMode;
using Tegra::Shader::TextureType;
static std::size_t GetCoordCount(TextureType texture_type) {
switch (texture_type) {
case TextureType::Texture1D:
return 1;
case TextureType::Texture2D:
return 2;
case TextureType::Texture3D:
case TextureType::TextureCube:
return 3;
default:
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
return 0;
}
}
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -30,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
Tegra::Shader::IpaSampleMode::Default};
u64 next_element = instr.attribute.fmt20.element;
@@ -229,6 +247,194 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::TEX: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
break;
}
case OpCode::Id::TEXS: {
const TextureType texture_type{instr.texs.GetTextureType()};
const bool is_array{instr.texs.IsArrayTexture()};
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
}
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
if (instr.texs.fp32_flag) {
WriteTexsInstructionFloat(bb, instr, components);
} else {
WriteTexsInstructionHalfFloat(bb, instr, components);
}
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
}
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
WriteTexInstructionFloat(bb, instr,
GetTld4Code(instr, texture_type, depth_compare, is_array));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
}
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
coords.push_back(op_b);
} else {
coords.push_back(op_a);
coords.push_back(op_b);
}
std::vector<Node> extras;
extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, {}, {}, extras, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
WriteTexsInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
}
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
MetaTexture meta{sampler, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
static_cast<u32>(instr.txq.query_type.Value()));
}
break;
}
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
switch (texture_type) {
case TextureType::Texture1D:
coords.push_back(GetRegister(instr.gpr8));
break;
case TextureType::Texture2D:
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
break;
default:
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
// Fallback to interpreting as a 2D texture for now
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
for (u32 element = 0; element < 2; ++element) {
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
}
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
}
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
@@ -236,4 +442,291 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return pc;
}
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
bool is_array, bool is_shadow) {
const auto offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
itr->IsShadow() == is_shadow);
return *itr;
}
// Otherwise create a new mapping for this sampler
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
SetTemporal(bb, dest_elem++, components[elem]);
}
// After writing values in temporals, move them to the real registers
for (u32 i = 0; i < dest_elem; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
}
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
const Node4& components) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
SetTemporal(bb, dest_elem++, components[component]);
}
for (u32 i = 0; i < dest_elem; ++i) {
if (i < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
}
}
}
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
const Node4& components) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
values[dest_elem++] = components[component];
}
if (dest_elem == 0)
return;
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
if (dest_elem <= 2) {
SetRegister(bb, instr.gpr0, first_value);
return;
}
SetTemporal(bb, 0, first_value);
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr28, GetTemporal(1));
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset) {
const bool is_array = array;
const bool is_shadow = depth_compare;
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
const OperationCode read_method =
lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
std::vector<Node> extras;
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
if (process_mode == TextureProcessMode::LZ) {
extras.push_back(Immediate(0.0f));
} else {
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
}
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
MetaTexture meta{sampler, array, depth_compare, extras, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
return values;
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
// 1D.DC in OpenGL the 2nd component is ignored.
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
coords.push_back(Immediate(0.0f));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
const u64 last_coord_register =
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
const u32 bias_offset = coord_count > 2 ? 1 : 0;
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (size_t i = 0; i < coord_count; ++i)
coords.push_back(GetRegister(coord_register + i));
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// if is array gpr20 is used
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
const u64 last_coord_register =
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array, {}, {lod}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
std::size_t max_coords, std::size_t max_inputs) {
const std::size_t coord_count = GetCoordCount(texture_type);
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
UNIMPLEMENTED_MSG("Unsupported Texture operation");
total_coord_count = std::min(total_coord_count, max_coords);
}
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
total_coord_count +=
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader

View File

@@ -135,18 +135,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
instr.ipa.sample_mode.Value()};
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
Node value = attr;
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
}
}
value = GetSaturatedFloat(value, instr.ipa.saturate);
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
break;
@@ -186,4 +175,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -1,534 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::TextureMiscMode;
using Tegra::Shader::TextureProcessMode;
using Tegra::Shader::TextureType;
static std::size_t GetCoordCount(TextureType texture_type) {
switch (texture_type) {
case TextureType::Texture1D:
return 1;
case TextureType::Texture2D:
return 2;
case TextureType::Texture3D:
case TextureType::TextureCube:
return 3;
default:
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
return 0;
}
}
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
break;
}
case OpCode::Id::TEXS: {
const TextureType texture_type{instr.texs.GetTextureType()};
const bool is_array{instr.texs.IsArrayTexture()};
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
}
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
if (instr.texs.fp32_flag) {
WriteTexsInstructionFloat(bb, instr, components);
} else {
WriteTexsInstructionHalfFloat(bb, instr, components);
}
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
}
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
WriteTexInstructionFloat(bb, instr,
GetTld4Code(instr, texture_type, depth_compare, is_array));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
}
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
coords.push_back(op_b);
} else {
coords.push_back(op_a);
coords.push_back(op_b);
}
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const auto& sampler =
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
WriteTexsInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
}
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
static_cast<u32>(instr.txq.query_type.Value()));
}
break;
}
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
}
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
switch (texture_type) {
case TextureType::Texture1D:
coords.push_back(GetRegister(instr.gpr8));
break;
case TextureType::Texture2D:
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
break;
default:
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
// Fallback to interpreting as a 2D texture for now
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
for (u32 element = 0; element < 2; ++element) {
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
}
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
}
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
return pc;
}
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
bool is_array, bool is_shadow) {
const auto offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
itr->IsShadow() == is_shadow);
return *itr;
}
// Otherwise create a new mapping for this sampler
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
SetTemporal(bb, dest_elem++, components[elem]);
}
// After writing values in temporals, move them to the real registers
for (u32 i = 0; i < dest_elem; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
}
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
const Node4& components) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
SetTemporal(bb, dest_elem++, components[component]);
}
for (u32 i = 0; i < dest_elem; ++i) {
if (i < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
}
}
}
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
const Node4& components) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
values[dest_elem++] = components[component];
}
if (dest_elem == 0)
return;
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
if (dest_elem <= 2) {
SetRegister(bb, instr.gpr0, first_value);
return;
}
SetTemporal(bb, 0, first_value);
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
SetRegister(bb, instr.gpr0, GetTemporal(0));
SetRegister(bb, instr.gpr28, GetTemporal(1));
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset) {
const bool is_array = array;
const bool is_shadow = depth_compare;
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
const OperationCode read_method =
(lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
Node bias = {};
Node lod = {};
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
switch (process_mode) {
case TextureProcessMode::LZ:
lod = Immediate(0.0f);
break;
case TextureProcessMode::LB:
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
bias = GetRegister(instr.gpr20.Value() + bias_offset);
break;
case TextureProcessMode::LL:
lod = GetRegister(instr.gpr20.Value() + bias_offset);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
break;
}
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
return values;
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
// 1D.DC in OpenGL the 2nd component is ignored.
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
coords.push_back(Immediate(0.0f));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
const u64 last_coord_register =
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
const u32 bias_offset = coord_count > 2 ? 1 : 0;
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (size_t i = 0; i < coord_count; ++i)
coords.push_back(GetRegister(coord_register + i));
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
// if is array gpr20 is used
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
const u64 last_coord_register =
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
? static_cast<u64>(instr.gpr20.Value())
: coord_register + 1;
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
std::size_t max_coords, std::size_t max_inputs) {
const std::size_t coord_count = GetCoordCount(texture_type);
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
UNIMPLEMENTED_MSG("Unsupported Texture operation");
total_coord_count = std::min(total_coord_count, max_coords);
}
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
total_coord_count +=
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader

View File

@@ -290,9 +290,7 @@ struct MetaTexture {
const Sampler& sampler;
Node array{};
Node depth_compare{};
Node bias{};
Node lod{};
Node component{};
std::vector<Node> extras;
u32 element{};
};
@@ -616,7 +614,6 @@ private:
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
u32 DecodeConversion(NodeBlock& bb, u32 pc);
u32 DecodeMemory(NodeBlock& bb, u32 pc);
u32 DecodeTexture(NodeBlock& bb, u32 pc);
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);

View File

@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
return {node, cursor};
}
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
const auto& conditional_code = conditional->GetCode();
const auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
const auto& code = conditional->GetCode();
const auto [found, internal_cursor] =
FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
if (found)
return {found, cursor};
}
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
const auto& conditional_code = conditional->GetCode();
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
const auto& code = conditional->GetCode();
return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
}
return nullptr;
}