Compare commits
3 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
730eb1dad7 | ||
|
|
33a0597603 | ||
|
|
281a8bf259 |
@@ -73,7 +73,6 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
|
||||
}
|
||||
}
|
||||
|
||||
state.yn1 = static_cast<s16>(yn1);
|
||||
state.yn2 = static_cast<s16>(yn2);
|
||||
state.yn1 = yn1;
|
||||
state.yn2 = yn2;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
~CubebSinkStream() override {
|
||||
~CubebSinkStream() {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
@@ -75,11 +75,11 @@ public:
|
||||
queue.Push(samples);
|
||||
}
|
||||
|
||||
std::size_t SamplesInQueue(u32 channel_count) const override {
|
||||
std::size_t SamplesInQueue(u32 num_channels) const override {
|
||||
if (!ctx)
|
||||
return 0;
|
||||
|
||||
return queue.Size() / channel_count;
|
||||
return queue.Size() / num_channels;
|
||||
}
|
||||
|
||||
void Flush() override {
|
||||
@@ -98,7 +98,7 @@ private:
|
||||
u32 num_channels{};
|
||||
|
||||
Common::RingBuffer<s16, 0x10000> queue;
|
||||
std::array<s16, 2> last_frame{};
|
||||
std::array<s16, 2> last_frame;
|
||||
std::atomic<bool> should_flush{};
|
||||
TimeStretcher time_stretch;
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/memory.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/texture.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/other.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
|
||||
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
|
||||
|
||||
@@ -28,8 +28,8 @@
|
||||
#include <cstring>
|
||||
#include "common/common_types.h"
|
||||
|
||||
// GCC
|
||||
#ifdef __GNUC__
|
||||
// GCC 4.6+
|
||||
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
|
||||
|
||||
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
|
||||
#define COMMON_LITTLE_ENDIAN 1
|
||||
@@ -38,7 +38,7 @@
|
||||
#endif
|
||||
|
||||
// LLVM/clang
|
||||
#elif defined(__clang__)
|
||||
#elif __clang__
|
||||
|
||||
#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
|
||||
#define COMMON_LITTLE_ENDIAN 1
|
||||
|
||||
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
|
||||
if (offset + length > data.size())
|
||||
data.resize(offset + length);
|
||||
const auto write = std::min(length, data.size() - offset);
|
||||
std::memcpy(data.data() + offset, data_, write);
|
||||
std::memcpy(data.data(), data_, write);
|
||||
return write;
|
||||
}
|
||||
|
||||
|
||||
@@ -28,13 +28,9 @@ namespace Service::NVFlinger {
|
||||
constexpr std::size_t SCREEN_REFRESH_RATE = 60;
|
||||
constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
|
||||
|
||||
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
|
||||
displays.emplace_back(0, "Default");
|
||||
displays.emplace_back(1, "External");
|
||||
displays.emplace_back(2, "Edid");
|
||||
displays.emplace_back(3, "Internal");
|
||||
displays.emplace_back(4, "Null");
|
||||
|
||||
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing)
|
||||
: displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}},
|
||||
core_timing{core_timing} {
|
||||
// Schedule the screen composition events
|
||||
composition_event =
|
||||
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
|
||||
@@ -59,14 +55,13 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
|
||||
// TODO(Subv): Currently we only support the Default display.
|
||||
ASSERT(name == "Default");
|
||||
|
||||
const auto itr =
|
||||
std::find_if(displays.begin(), displays.end(),
|
||||
[&](const VI::Display& display) { return display.GetName() == name; });
|
||||
const auto itr = std::find_if(displays.begin(), displays.end(),
|
||||
[&](const VI::Display& display) { return display.name == name; });
|
||||
if (itr == displays.end()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return itr->GetID();
|
||||
return itr->id;
|
||||
}
|
||||
|
||||
std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
|
||||
@@ -76,10 +71,13 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
|
||||
return {};
|
||||
}
|
||||
|
||||
ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
|
||||
|
||||
const u64 layer_id = next_layer_id++;
|
||||
const u32 buffer_queue_id = next_buffer_queue_id++;
|
||||
buffer_queues.emplace_back(buffer_queue_id, layer_id);
|
||||
display->CreateLayer(layer_id, buffer_queues.back());
|
||||
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
|
||||
display->layers.emplace_back(layer_id, buffer_queue);
|
||||
buffer_queues.emplace_back(std::move(buffer_queue));
|
||||
return layer_id;
|
||||
}
|
||||
|
||||
@@ -90,7 +88,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
|
||||
return {};
|
||||
}
|
||||
|
||||
return layer->GetBufferQueue().GetId();
|
||||
return layer->buffer_queue->GetId();
|
||||
}
|
||||
|
||||
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
|
||||
@@ -100,20 +98,12 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return display->GetVSyncEvent();
|
||||
return display->vsync_event.readable;
|
||||
}
|
||||
|
||||
BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
|
||||
std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
|
||||
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
|
||||
[id](const auto& queue) { return queue.GetId() == id; });
|
||||
|
||||
ASSERT(itr != buffer_queues.end());
|
||||
return *itr;
|
||||
}
|
||||
|
||||
const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
|
||||
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
|
||||
[id](const auto& queue) { return queue.GetId() == id; });
|
||||
[&](const auto& queue) { return queue->GetId() == id; });
|
||||
|
||||
ASSERT(itr != buffer_queues.end());
|
||||
return *itr;
|
||||
@@ -122,7 +112,7 @@ const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
|
||||
VI::Display* NVFlinger::FindDisplay(u64 display_id) {
|
||||
const auto itr =
|
||||
std::find_if(displays.begin(), displays.end(),
|
||||
[&](const VI::Display& display) { return display.GetID() == display_id; });
|
||||
[&](const VI::Display& display) { return display.id == display_id; });
|
||||
|
||||
if (itr == displays.end()) {
|
||||
return nullptr;
|
||||
@@ -134,7 +124,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
|
||||
const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
|
||||
const auto itr =
|
||||
std::find_if(displays.begin(), displays.end(),
|
||||
[&](const VI::Display& display) { return display.GetID() == display_id; });
|
||||
[&](const VI::Display& display) { return display.id == display_id; });
|
||||
|
||||
if (itr == displays.end()) {
|
||||
return nullptr;
|
||||
@@ -150,7 +140,14 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return display->FindLayer(layer_id);
|
||||
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
|
||||
[&](const VI::Layer& layer) { return layer.id == layer_id; });
|
||||
|
||||
if (itr == display->layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
}
|
||||
|
||||
const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
|
||||
@@ -160,24 +157,33 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return display->FindLayer(layer_id);
|
||||
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
|
||||
[&](const VI::Layer& layer) { return layer.id == layer_id; });
|
||||
|
||||
if (itr == display->layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
}
|
||||
|
||||
void NVFlinger::Compose() {
|
||||
for (auto& display : displays) {
|
||||
// Trigger vsync for this display at the end of drawing
|
||||
SCOPE_EXIT({ display.SignalVSyncEvent(); });
|
||||
SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
|
||||
|
||||
// Don't do anything for displays without layers.
|
||||
if (!display.HasLayers())
|
||||
if (display.layers.empty())
|
||||
continue;
|
||||
|
||||
// TODO(Subv): Support more than 1 layer.
|
||||
VI::Layer& layer = display.GetLayer(0);
|
||||
auto& buffer_queue = layer.GetBufferQueue();
|
||||
ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
|
||||
|
||||
VI::Layer& layer = display.layers[0];
|
||||
auto& buffer_queue = layer.buffer_queue;
|
||||
|
||||
// Search for a queued buffer and acquire it
|
||||
auto buffer = buffer_queue.AcquireBuffer();
|
||||
auto buffer = buffer_queue->AcquireBuffer();
|
||||
|
||||
MicroProfileFlip();
|
||||
|
||||
@@ -202,7 +208,7 @@ void NVFlinger::Compose() {
|
||||
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
|
||||
buffer->get().transform, buffer->get().crop_rect);
|
||||
|
||||
buffer_queue.ReleaseBuffer(buffer->get().slot);
|
||||
buffer_queue->ReleaseBuffer(buffer->get().slot);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,8 +28,8 @@ class Module;
|
||||
} // namespace Service::Nvidia
|
||||
|
||||
namespace Service::VI {
|
||||
class Display;
|
||||
class Layer;
|
||||
struct Display;
|
||||
struct Layer;
|
||||
} // namespace Service::VI
|
||||
|
||||
namespace Service::NVFlinger {
|
||||
@@ -65,10 +65,7 @@ public:
|
||||
Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
|
||||
|
||||
/// Obtains a buffer queue identified by the ID.
|
||||
BufferQueue& FindBufferQueue(u32 id);
|
||||
|
||||
/// Obtains a buffer queue identified by the ID.
|
||||
const BufferQueue& FindBufferQueue(u32 id) const;
|
||||
std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
|
||||
|
||||
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
|
||||
/// finished.
|
||||
@@ -90,7 +87,7 @@ private:
|
||||
std::shared_ptr<Nvidia::Module> nvdrv;
|
||||
|
||||
std::vector<VI::Display> displays;
|
||||
std::vector<BufferQueue> buffer_queues;
|
||||
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
|
||||
|
||||
/// Id to use for the next layer that is created, this counter is shared among all displays.
|
||||
u64 next_layer_id = 1;
|
||||
|
||||
@@ -2,12 +2,8 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "core/hle/kernel/readable_event.h"
|
||||
#include "core/hle/service/vi/display/vi_display.h"
|
||||
@@ -23,49 +19,4 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
|
||||
|
||||
Display::~Display() = default;
|
||||
|
||||
Layer& Display::GetLayer(std::size_t index) {
|
||||
return layers.at(index);
|
||||
}
|
||||
|
||||
const Layer& Display::GetLayer(std::size_t index) const {
|
||||
return layers.at(index);
|
||||
}
|
||||
|
||||
Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
|
||||
return vsync_event.readable;
|
||||
}
|
||||
|
||||
void Display::SignalVSyncEvent() {
|
||||
vsync_event.writable->Signal();
|
||||
}
|
||||
|
||||
void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
|
||||
// TODO(Subv): Support more than 1 layer.
|
||||
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
|
||||
|
||||
layers.emplace_back(id, buffer_queue);
|
||||
}
|
||||
|
||||
Layer* Display::FindLayer(u64 id) {
|
||||
const auto itr = std::find_if(layers.begin(), layers.end(),
|
||||
[id](const VI::Layer& layer) { return layer.GetID() == id; });
|
||||
|
||||
if (itr == layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
}
|
||||
|
||||
const Layer* Display::FindLayer(u64 id) const {
|
||||
const auto itr = std::find_if(layers.begin(), layers.end(),
|
||||
[id](const VI::Layer& layer) { return layer.GetID() == id; });
|
||||
|
||||
if (itr == layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
}
|
||||
|
||||
} // namespace Service::VI
|
||||
|
||||
@@ -10,84 +10,14 @@
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
|
||||
namespace Service::NVFlinger {
|
||||
class BufferQueue;
|
||||
}
|
||||
|
||||
namespace Service::VI {
|
||||
|
||||
class Layer;
|
||||
struct Layer;
|
||||
|
||||
/// Represents a single display type
|
||||
class Display {
|
||||
public:
|
||||
/// Constructs a display with a given unique ID and name.
|
||||
///
|
||||
/// @param id The unique ID for this display.
|
||||
/// @param name The name for this display.
|
||||
///
|
||||
struct Display {
|
||||
Display(u64 id, std::string name);
|
||||
~Display();
|
||||
|
||||
Display(const Display&) = delete;
|
||||
Display& operator=(const Display&) = delete;
|
||||
|
||||
Display(Display&&) = default;
|
||||
Display& operator=(Display&&) = default;
|
||||
|
||||
/// Gets the unique ID assigned to this display.
|
||||
u64 GetID() const {
|
||||
return id;
|
||||
}
|
||||
|
||||
/// Gets the name of this display
|
||||
const std::string& GetName() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
/// Whether or not this display has any layers added to it.
|
||||
bool HasLayers() const {
|
||||
return !layers.empty();
|
||||
}
|
||||
|
||||
/// Gets a layer for this display based off an index.
|
||||
Layer& GetLayer(std::size_t index);
|
||||
|
||||
/// Gets a layer for this display based off an index.
|
||||
const Layer& GetLayer(std::size_t index) const;
|
||||
|
||||
/// Gets the readable vsync event.
|
||||
Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
|
||||
|
||||
/// Signals the internal vsync event.
|
||||
void SignalVSyncEvent();
|
||||
|
||||
/// Creates and adds a layer to this display with the given ID.
|
||||
///
|
||||
/// @param id The ID to assign to the created layer.
|
||||
/// @param buffer_queue The buffer queue for the layer instance to use.
|
||||
///
|
||||
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
|
||||
|
||||
/// Attempts to find a layer with the given ID.
|
||||
///
|
||||
/// @param id The layer ID.
|
||||
///
|
||||
/// @returns If found, the Layer instance with the given ID.
|
||||
/// If not found, then nullptr is returned.
|
||||
///
|
||||
Layer* FindLayer(u64 id);
|
||||
|
||||
/// Attempts to find a layer with the given ID.
|
||||
///
|
||||
/// @param id The layer ID.
|
||||
///
|
||||
/// @returns If found, the Layer instance with the given ID.
|
||||
/// If not found, then nullptr is returned.
|
||||
///
|
||||
const Layer* FindLayer(u64 id) const;
|
||||
|
||||
private:
|
||||
u64 id;
|
||||
std::string name;
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
|
||||
namespace Service::VI {
|
||||
|
||||
Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
|
||||
Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue)
|
||||
: id{id}, buffer_queue{std::move(queue)} {}
|
||||
|
||||
Layer::~Layer() = default;
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Service::NVFlinger {
|
||||
@@ -12,41 +14,12 @@ class BufferQueue;
|
||||
|
||||
namespace Service::VI {
|
||||
|
||||
/// Represents a single display layer.
|
||||
class Layer {
|
||||
public:
|
||||
/// Constructs a layer with a given ID and buffer queue.
|
||||
///
|
||||
/// @param id The ID to assign to this layer.
|
||||
/// @param queue The buffer queue for this layer to use.
|
||||
///
|
||||
Layer(u64 id, NVFlinger::BufferQueue& queue);
|
||||
struct Layer {
|
||||
Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue);
|
||||
~Layer();
|
||||
|
||||
Layer(const Layer&) = delete;
|
||||
Layer& operator=(const Layer&) = delete;
|
||||
|
||||
Layer(Layer&&) = default;
|
||||
Layer& operator=(Layer&&) = delete;
|
||||
|
||||
/// Gets the ID for this layer.
|
||||
u64 GetID() const {
|
||||
return id;
|
||||
}
|
||||
|
||||
/// Gets a reference to the buffer queue this layer is using.
|
||||
NVFlinger::BufferQueue& GetBufferQueue() {
|
||||
return buffer_queue;
|
||||
}
|
||||
|
||||
/// Gets a const reference to the buffer queue this layer is using.
|
||||
const NVFlinger::BufferQueue& GetBufferQueue() const {
|
||||
return buffer_queue;
|
||||
}
|
||||
|
||||
private:
|
||||
u64 id;
|
||||
NVFlinger::BufferQueue& buffer_queue;
|
||||
std::shared_ptr<NVFlinger::BufferQueue> buffer_queue;
|
||||
};
|
||||
|
||||
} // namespace Service::VI
|
||||
|
||||
@@ -525,7 +525,7 @@ private:
|
||||
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
|
||||
static_cast<u32>(transaction), flags);
|
||||
|
||||
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
auto buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
|
||||
if (transaction == TransactionId::Connect) {
|
||||
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
|
||||
@@ -538,7 +538,7 @@ private:
|
||||
} else if (transaction == TransactionId::SetPreallocatedBuffer) {
|
||||
IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
|
||||
|
||||
buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
|
||||
buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
|
||||
|
||||
IGBPSetPreallocatedBufferResponseParcel response{};
|
||||
ctx.WriteBuffer(response.Serialize());
|
||||
@@ -546,7 +546,7 @@ private:
|
||||
IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
|
||||
const u32 width{request.data.width};
|
||||
const u32 height{request.data.height};
|
||||
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
|
||||
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
|
||||
|
||||
if (slot) {
|
||||
// Buffer is available
|
||||
@@ -559,8 +559,8 @@ private:
|
||||
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
|
||||
Kernel::ThreadWakeupReason reason) {
|
||||
// Repeat TransactParcel DequeueBuffer when a buffer is available
|
||||
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
|
||||
auto buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
|
||||
ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
|
||||
|
||||
IGBPDequeueBufferResponseParcel response{*slot};
|
||||
@@ -568,28 +568,28 @@ private:
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
},
|
||||
buffer_queue.GetWritableBufferWaitEvent());
|
||||
buffer_queue->GetWritableBufferWaitEvent());
|
||||
}
|
||||
} else if (transaction == TransactionId::RequestBuffer) {
|
||||
IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
|
||||
|
||||
auto& buffer = buffer_queue.RequestBuffer(request.slot);
|
||||
auto& buffer = buffer_queue->RequestBuffer(request.slot);
|
||||
|
||||
IGBPRequestBufferResponseParcel response{buffer};
|
||||
ctx.WriteBuffer(response.Serialize());
|
||||
} else if (transaction == TransactionId::QueueBuffer) {
|
||||
IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
|
||||
|
||||
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
|
||||
request.data.GetCropRect());
|
||||
buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
|
||||
request.data.GetCropRect());
|
||||
|
||||
IGBPQueueBufferResponseParcel response{1280, 720};
|
||||
ctx.WriteBuffer(response.Serialize());
|
||||
} else if (transaction == TransactionId::Query) {
|
||||
IGBPQueryRequestParcel request{ctx.ReadBuffer()};
|
||||
|
||||
const u32 value =
|
||||
buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
|
||||
u32 value =
|
||||
buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
|
||||
|
||||
IGBPQueryResponseParcel response{value};
|
||||
ctx.WriteBuffer(response.Serialize());
|
||||
@@ -629,12 +629,12 @@ private:
|
||||
|
||||
LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
|
||||
|
||||
const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
const auto buffer_queue = nv_flinger->FindBufferQueue(id);
|
||||
|
||||
// TODO(Subv): Find out what this actually is.
|
||||
IPC::ResponseBuilder rb{ctx, 2, 1};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
|
||||
rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
|
||||
}
|
||||
|
||||
std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
|
||||
@@ -752,7 +752,6 @@ public:
|
||||
{1102, nullptr, "GetDisplayResolution"},
|
||||
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
|
||||
{2011, nullptr, "DestroyManagedLayer"},
|
||||
{2012, nullptr, "CreateStrayLayer"},
|
||||
{2050, nullptr, "CreateIndirectLayer"},
|
||||
{2051, nullptr, "DestroyIndirectLayer"},
|
||||
{2052, nullptr, "CreateIndirectProducerEndPoint"},
|
||||
|
||||
@@ -74,7 +74,6 @@ add_library(video_core STATIC
|
||||
shader/decode/hfma2.cpp
|
||||
shader/decode/conversion.cpp
|
||||
shader/decode/memory.cpp
|
||||
shader/decode/texture.cpp
|
||||
shader/decode/float_set_predicate.cpp
|
||||
shader/decode/integer_set_predicate.cpp
|
||||
shader/decode/half_set_predicate.cpp
|
||||
@@ -112,7 +111,9 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/vk_resource_manager.cpp
|
||||
renderer_vulkan/vk_resource_manager.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h)
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h)
|
||||
|
||||
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
|
||||
@@ -33,36 +33,18 @@ void DmaPusher::DispatchCalls() {
|
||||
}
|
||||
|
||||
bool DmaPusher::Step() {
|
||||
if (!ib_enable || dma_pushbuffer.empty()) {
|
||||
// pushbuffer empty and IB empty or nonexistent - nothing to do
|
||||
return false;
|
||||
}
|
||||
if (dma_get != dma_put) {
|
||||
// Push buffer non-empty, read a word
|
||||
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
|
||||
ASSERT_MSG(address, "Invalid GPU address");
|
||||
|
||||
const CommandList& command_list{dma_pushbuffer.front()};
|
||||
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
GPUVAddr dma_get = command_list_header.addr;
|
||||
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
|
||||
bool non_main = command_list_header.is_non_main;
|
||||
const CommandHeader command_header{Memory::Read32(*address)};
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
}
|
||||
dma_get += sizeof(u32);
|
||||
|
||||
if (command_list_header.size == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Push buffer non-empty, read a word
|
||||
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
|
||||
ASSERT_MSG(address, "Invalid GPU address");
|
||||
|
||||
command_headers.resize(command_list_header.size);
|
||||
|
||||
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
|
||||
|
||||
for (const CommandHeader& command_header : command_headers) {
|
||||
if (!non_main) {
|
||||
dma_mget = dma_get;
|
||||
}
|
||||
|
||||
// now, see if we're in the middle of a command
|
||||
if (dma_state.length_pending) {
|
||||
@@ -109,11 +91,22 @@ bool DmaPusher::Step() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (ib_enable && !dma_pushbuffer.empty()) {
|
||||
// Current pushbuffer empty, but we have more IB entries to read
|
||||
const CommandList& command_list{dma_pushbuffer.front()};
|
||||
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
dma_get = command_list_header.addr;
|
||||
dma_put = dma_get + command_list_header.size * sizeof(u32);
|
||||
non_main = command_list_header.is_non_main;
|
||||
|
||||
if (!non_main) {
|
||||
// TODO (degasus): This is dead code, as dma_mget is never read.
|
||||
dma_mget = dma_put;
|
||||
if (dma_pushbuffer_subindex >= command_list.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
}
|
||||
} else {
|
||||
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
|
||||
return {};
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -75,8 +75,6 @@ private:
|
||||
|
||||
GPU& gpu;
|
||||
|
||||
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
|
||||
|
||||
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
|
||||
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
|
||||
|
||||
@@ -91,8 +89,11 @@ private:
|
||||
DmaState dma_state{};
|
||||
bool dma_increment_once{};
|
||||
|
||||
GPUVAddr dma_put{}; ///< pushbuffer current end address
|
||||
GPUVAddr dma_get{}; ///< pushbuffer current read address
|
||||
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
|
||||
bool ib_enable{true}; ///< IB mode enabled
|
||||
bool non_main{}; ///< non-main pushbuffer active
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -325,11 +325,11 @@ enum class TextureQueryType : u64 {
|
||||
|
||||
enum class TextureProcessMode : u64 {
|
||||
None = 0,
|
||||
LZ = 1, // Load LOD of zero.
|
||||
LZ = 1, // Unknown, appears to be the same as none.
|
||||
LB = 2, // Load Bias.
|
||||
LL = 3, // Load LOD.
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
|
||||
LL = 3, // Load LOD (LevelOfDetail)
|
||||
LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
|
||||
LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
|
||||
};
|
||||
|
||||
enum class TextureMiscMode : u64 {
|
||||
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
|
||||
};
|
||||
|
||||
enum class IpaInterpMode : u64 {
|
||||
Pass = 0,
|
||||
Multiply = 1,
|
||||
Constant = 2,
|
||||
Linear = 0,
|
||||
Perspective = 1,
|
||||
Flat = 2,
|
||||
Sc = 3,
|
||||
};
|
||||
|
||||
@@ -1446,7 +1446,6 @@ public:
|
||||
Flow,
|
||||
Synch,
|
||||
Memory,
|
||||
Texture,
|
||||
FloatSet,
|
||||
FloatSetPredicate,
|
||||
IntegerSet,
|
||||
@@ -1577,14 +1576,14 @@ private:
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
|
||||
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
|
||||
INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
|
||||
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
|
||||
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
|
||||
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
|
||||
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
|
||||
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
|
||||
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
|
||||
|
||||
@@ -16,13 +16,6 @@ enum class OutputTopology : u32 {
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class AttributeUse : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
|
||||
struct Header {
|
||||
@@ -91,15 +84,9 @@ struct Header {
|
||||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
union {
|
||||
BitField<0, 2, AttributeUse> x;
|
||||
BitField<2, 2, AttributeUse> y;
|
||||
BitField<4, 2, AttributeUse> w;
|
||||
BitField<6, 2, AttributeUse> z;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
|
||||
INSERT_PADDING_BYTES(2); // ImapColor
|
||||
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
|
||||
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
|
||||
@@ -116,28 +103,6 @@ struct Header {
|
||||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
|
||||
return static_cast<AttributeUse>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
|
||||
}
|
||||
AttributeUse GetAttributeUse(u32 attribute) const {
|
||||
AttributeUse result = AttributeUse::Unused;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const auto index = GetAttributeIndexUse(attribute, i);
|
||||
if (index == AttributeUse::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result == AttributeUse::Unused || result == index) {
|
||||
result = index;
|
||||
continue;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
|
||||
if (index == AttributeUse::Perspective) {
|
||||
result = index;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} ps;
|
||||
};
|
||||
|
||||
|
||||
@@ -1257,11 +1257,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
if (old_params.pixel_format == new_params.pixel_format)
|
||||
FastLayeredCopySurface(old_surface, new_surface);
|
||||
else {
|
||||
AccurateCopySurface(old_surface, new_surface);
|
||||
}
|
||||
FastLayeredCopySurface(old_surface, new_surface);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
||||
|
||||
@@ -5,9 +5,7 @@
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
@@ -22,7 +20,6 @@
|
||||
namespace OpenGL::GLShader {
|
||||
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::AttributeUse;
|
||||
using Tegra::Shader::Header;
|
||||
using Tegra::Shader::IpaInterpMode;
|
||||
using Tegra::Shader::IpaMode;
|
||||
@@ -291,22 +288,34 @@ private:
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
std::string GetInputFlags(AttributeUse attribute) {
|
||||
std::string GetInputFlags(const IpaMode& input_mode) {
|
||||
const IpaSampleMode sample_mode = input_mode.sampling_mode;
|
||||
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
|
||||
std::string out;
|
||||
|
||||
switch (attribute) {
|
||||
case AttributeUse::Constant:
|
||||
switch (interp_mode) {
|
||||
case IpaInterpMode::Flat:
|
||||
out += "flat ";
|
||||
break;
|
||||
case AttributeUse::ScreenLinear:
|
||||
case IpaInterpMode::Linear:
|
||||
out += "noperspective ";
|
||||
break;
|
||||
case AttributeUse::Perspective:
|
||||
case IpaInterpMode::Perspective:
|
||||
// Default, Smooth
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
|
||||
UNREACHABLE();
|
||||
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
|
||||
}
|
||||
switch (sample_mode) {
|
||||
case IpaSampleMode::Centroid:
|
||||
// It can be implemented with the "centroid " keyword in GLSL
|
||||
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
|
||||
break;
|
||||
case IpaSampleMode::Default:
|
||||
// Default, n/a
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -315,11 +324,16 @@ private:
|
||||
const auto& attributes = ir.GetInputAttributes();
|
||||
for (const auto element : attributes) {
|
||||
const Attribute::Index index = element.first;
|
||||
const IpaMode& input_mode = *element.second.begin();
|
||||
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
|
||||
// Skip when it's not a generic attribute
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT(element.second.size() > 0);
|
||||
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
|
||||
"Multiple input flag modes are not supported in GLSL");
|
||||
|
||||
// TODO(bunnei): Use proper number of elements for these
|
||||
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
|
||||
if (stage != ShaderStage::Vertex) {
|
||||
@@ -331,14 +345,8 @@ private:
|
||||
if (stage == ShaderStage::Geometry) {
|
||||
attr = "gs_" + attr + "[]";
|
||||
}
|
||||
std::string suffix;
|
||||
if (stage == ShaderStage::Fragment) {
|
||||
const auto input_mode =
|
||||
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
|
||||
suffix = GetInputFlags(input_mode);
|
||||
}
|
||||
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
|
||||
attr + ';');
|
||||
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
|
||||
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
|
||||
}
|
||||
if (!attributes.empty())
|
||||
code.AddNewLine();
|
||||
@@ -719,7 +727,7 @@ private:
|
||||
}
|
||||
|
||||
std::string GenerateTexture(Operation operation, const std::string& func,
|
||||
const std::vector<std::pair<Type, Node>>& extras) {
|
||||
bool is_extra_int = false) {
|
||||
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
@@ -740,47 +748,36 @@ private:
|
||||
expr += Visit(operation[i]);
|
||||
|
||||
const std::size_t next = i + 1;
|
||||
if (next < count)
|
||||
if (next < count || has_array || has_shadow)
|
||||
expr += ", ";
|
||||
}
|
||||
if (has_array) {
|
||||
expr += ", float(ftoi(" + Visit(meta->array) + "))";
|
||||
expr += "float(ftoi(" + Visit(meta->array) + "))";
|
||||
}
|
||||
if (has_shadow) {
|
||||
expr += ", " + Visit(meta->depth_compare);
|
||||
if (has_array)
|
||||
expr += ", ";
|
||||
expr += Visit(meta->depth_compare);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
for (const auto& extra_pair : extras) {
|
||||
const auto [type, operand] = extra_pair;
|
||||
if (operand == nullptr) {
|
||||
continue;
|
||||
}
|
||||
for (const Node extra : meta->extras) {
|
||||
expr += ", ";
|
||||
|
||||
switch (type) {
|
||||
case Type::Int:
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
|
||||
if (is_extra_int) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
|
||||
// Inline the string as an immediate integer in GLSL (some extra arguments are
|
||||
// required to be constant)
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else {
|
||||
expr += "ftoi(" + Visit(operand) + ')';
|
||||
expr += "ftoi(" + Visit(extra) + ')';
|
||||
}
|
||||
break;
|
||||
case Type::Float:
|
||||
expr += Visit(operand);
|
||||
break;
|
||||
default: {
|
||||
const auto type_int = static_cast<u32>(type);
|
||||
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
|
||||
expr += '0';
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
expr += Visit(extra);
|
||||
}
|
||||
}
|
||||
|
||||
return expr + ')';
|
||||
expr += ')';
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string Assign(Operation operation) {
|
||||
@@ -1159,7 +1156,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
|
||||
std::string expr = GenerateTexture(operation, "texture");
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1170,7 +1167,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
|
||||
std::string expr = GenerateTexture(operation, "textureLod");
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1181,8 +1178,7 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
|
||||
return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
|
||||
GetSwizzle(meta->element);
|
||||
}
|
||||
|
||||
@@ -1211,8 +1207,8 @@ private:
|
||||
ASSERT(meta);
|
||||
|
||||
if (meta->element < 2) {
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
|
||||
" * vec2(256))" + GetSwizzle(meta->element) + "))";
|
||||
return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
|
||||
GetSwizzle(meta->element) + "))";
|
||||
}
|
||||
return "0";
|
||||
}
|
||||
@@ -1238,9 +1234,9 @@ private:
|
||||
else if (next < count)
|
||||
expr += ", ";
|
||||
}
|
||||
if (meta->lod) {
|
||||
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
|
||||
expr += ", ";
|
||||
expr += CastOperand(Visit(meta->lod), Type::Int);
|
||||
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
|
||||
}
|
||||
expr += ')';
|
||||
|
||||
@@ -1588,4 +1584,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
|
||||
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL::GLShader
|
||||
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
|
||||
layout (location = 6) out vec4 FragColor6;
|
||||
layout (location = 7) out vec4 FragColor7;
|
||||
|
||||
layout (location = 0) in noperspective vec4 position;
|
||||
layout (location = 0) in vec4 position;
|
||||
|
||||
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
|
||||
vec4 viewport_flip;
|
||||
@@ -172,4 +172,4 @@ void main() {
|
||||
return {out, program.second};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL::GLShader
|
||||
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
|
||||
protected_resources.push_back(resource);
|
||||
}
|
||||
|
||||
void VKFence::Unprotect(const VKResource* resource) {
|
||||
void VKFence::Unprotect(VKResource* resource) {
|
||||
const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
|
||||
if (it != protected_resources.end()) {
|
||||
protected_resources.erase(it);
|
||||
}
|
||||
ASSERT(it != protected_resources.end());
|
||||
|
||||
resource->OnFenceRemoval(this);
|
||||
protected_resources.erase(it);
|
||||
}
|
||||
|
||||
VKFenceWatch::VKFenceWatch() = default;
|
||||
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
|
||||
}
|
||||
|
||||
void VKFenceWatch::Wait() {
|
||||
if (!fence) {
|
||||
if (fence == nullptr) {
|
||||
return;
|
||||
}
|
||||
fence->Wait();
|
||||
fence->Unprotect(this);
|
||||
fence = nullptr;
|
||||
}
|
||||
|
||||
void VKFenceWatch::Watch(VKFence& new_fence) {
|
||||
|
||||
@@ -63,7 +63,7 @@ public:
|
||||
void Protect(VKResource* resource);
|
||||
|
||||
/// Removes protection for a resource.
|
||||
void Unprotect(const VKResource* resource);
|
||||
void Unprotect(VKResource* resource);
|
||||
|
||||
/// Retreives the fence.
|
||||
operator vk::Fence() const {
|
||||
|
||||
90
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
90
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
|
||||
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
|
||||
pipeline_stage} {
|
||||
CreateBuffers(memory_manager, usage);
|
||||
ReserveWatches(WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||
|
||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
|
||||
ASSERT(size <= buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (offset + size > buffer_size) {
|
||||
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
|
||||
// reset the state.
|
||||
invalidation_mark = used_watches;
|
||||
used_watches = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
|
||||
}
|
||||
|
||||
VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
if (invalidation_mark) {
|
||||
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
|
||||
exctx = scheduler.Flush();
|
||||
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
|
||||
[&](auto& resource) { resource->Wait(); });
|
||||
invalidation_mark = std::nullopt;
|
||||
}
|
||||
|
||||
if (used_watches + 1 >= watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
// Add a watch for this allocation.
|
||||
watches[used_watches++]->Watch(exctx.GetFence());
|
||||
|
||||
offset += size;
|
||||
|
||||
return exctx;
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
|
||||
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
|
||||
nullptr);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
commit = memory_manager.Commit(*buffer, true);
|
||||
mapped_pointer = commit->GetData();
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
|
||||
const std::size_t previous_size = watches.size();
|
||||
watches.resize(previous_size + grow_size);
|
||||
std::generate(watches.begin() + previous_size, watches.end(),
|
||||
[]() { return std::make_unique<VKFenceWatch>(); });
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
72
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
72
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKFenceWatch;
|
||||
class VKResourceManager;
|
||||
class VKScheduler;
|
||||
|
||||
class VKStreamBuffer {
|
||||
public:
|
||||
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
|
||||
~VKStreamBuffer();
|
||||
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
||||
* offset and a boolean that's true when buffer has been invalidated.
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Reserve(u64 size);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
[[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
|
||||
|
||||
vk::Buffer GetBuffer() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::size_t grow_size);
|
||||
|
||||
const VKDevice& device; ///< Vulkan device manager.
|
||||
VKScheduler& scheduler; ///< Command scheduler.
|
||||
const u64 buffer_size; ///< Total size of the stream buffer.
|
||||
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
||||
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
||||
|
||||
UniqueBuffer buffer; ///< Mapped buffer.
|
||||
VKMemoryCommit commit; ///< Memory commit.
|
||||
u8* mapped_pointer{}; ///< Pointer to the host visible commit
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
|
||||
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t>
|
||||
invalidation_mark{}; ///< Number of watches used in the current invalidation.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -165,7 +165,6 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
|
||||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
|
||||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
|
||||
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
|
||||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
|
||||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
|
||||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
|
||||
|
||||
@@ -17,6 +17,24 @@ using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
@@ -30,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
|
||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
|
||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
|
||||
Tegra::Shader::IpaSampleMode::Default};
|
||||
|
||||
u64 next_element = instr.attribute.fmt20.element;
|
||||
@@ -229,6 +247,194 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
std::vector<Node> extras;
|
||||
extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, extras, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
@@ -236,4 +442,291 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
std::vector<Node> extras;
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
if (process_mode == TextureProcessMode::LZ) {
|
||||
extras.push_back(Immediate(0.0f));
|
||||
} else {
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, extras, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {lod}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -135,18 +135,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
instr.ipa.sample_mode.Value()};
|
||||
|
||||
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
|
||||
Node value = attr;
|
||||
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
|
||||
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
|
||||
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
|
||||
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
|
||||
// In theory by setting them as perspective, OpenGL does the perspective correction.
|
||||
// A way must figured to reverse the last step of it.
|
||||
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
|
||||
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
|
||||
}
|
||||
}
|
||||
value = GetSaturatedFloat(value, instr.ipa.saturate);
|
||||
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
@@ -186,4 +175,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -1,534 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureMiscMode;
|
||||
using Tegra::Shader::TextureProcessMode;
|
||||
using Tegra::Shader::TextureType;
|
||||
|
||||
static std::size_t GetCoordCount(TextureType texture_type) {
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
return 1;
|
||||
case TextureType::Texture2D:
|
||||
return 2;
|
||||
case TextureType::Texture3D:
|
||||
case TextureType::TextureCube:
|
||||
return 3;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::TEX: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
|
||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const TextureType texture_type{instr.tex.texture_type};
|
||||
const bool is_array = instr.tex.array != 0;
|
||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TEXS: {
|
||||
const TextureType texture_type{instr.texs.GetTextureType()};
|
||||
const bool is_array{instr.texs.IsArrayTexture()};
|
||||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
|
||||
const auto process_mode = instr.texs.GetTextureProcessMode();
|
||||
|
||||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const Node4 components =
|
||||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
|
||||
|
||||
if (instr.texs.fp32_flag) {
|
||||
WriteTexsInstructionFloat(bb, instr, components);
|
||||
} else {
|
||||
WriteTexsInstructionHalfFloat(bb, instr, components);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
if (depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
coords.push_back(op_b);
|
||||
} else {
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
|
||||
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ: {
|
||||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
// uses. This must be fixed at a later instance.
|
||||
const auto& sampler =
|
||||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
|
||||
|
||||
u32 indexer = 0;
|
||||
switch (instr.txq.query_type) {
|
||||
case Tegra::Shader::TextureQueryType::Dimension: {
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||
SetTemporal(bb, indexer++, value);
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
|
||||
static_cast<u32>(instr.txq.query_type.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TMML: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
|
||||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
auto texture_type = instr.tmml.texture_type.Value();
|
||||
const bool is_array = instr.tmml.array != 0;
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
std::vector<Node> coords;
|
||||
|
||||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
switch (texture_type) {
|
||||
case TextureType::Texture1D:
|
||||
coords.push_back(GetRegister(instr.gpr8));
|
||||
break;
|
||||
case TextureType::Texture2D:
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
|
||||
|
||||
// Fallback to interpreting as a 2D texture for now
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 0));
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
texture_type = TextureType::Texture2D;
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporal(bb, element, value);
|
||||
}
|
||||
for (u32 element = 0; element < 2; ++element) {
|
||||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLDS: {
|
||||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
|
||||
const bool is_array{instr.tlds.IsArrayTexture()};
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
|
||||
|
||||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||
LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
}
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
|
||||
bool is_array, bool is_shadow) {
|
||||
const auto offset = static_cast<std::size_t>(sampler.index.Value());
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto itr =
|
||||
std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[&](const Sampler& entry) { return entry.GetOffset() == offset; });
|
||||
if (itr != used_samplers.end()) {
|
||||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
|
||||
itr->IsShadow() == is_shadow);
|
||||
return *itr;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const std::size_t next_index = used_samplers.size();
|
||||
const Sampler entry{offset, next_index, type, is_array, is_shadow};
|
||||
return *used_samplers.emplace(entry).first;
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
|
||||
u32 dest_elem = 0;
|
||||
for (u32 elem = 0; elem < 4; ++elem) {
|
||||
if (!instr.tex.IsComponentEnabled(elem)) {
|
||||
// Skip disabled components
|
||||
continue;
|
||||
}
|
||||
SetTemporal(bb, dest_elem++, components[elem]);
|
||||
}
|
||||
// After writing values in temporals, move them to the real registers
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
SetTemporal(bb, dest_elem++, components[component]);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < dest_elem; ++i) {
|
||||
if (i < 2) {
|
||||
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
|
||||
} else {
|
||||
ASSERT(instr.texs.HasTwoDestinations());
|
||||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||
const Node4& components) {
|
||||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
// float instruction).
|
||||
|
||||
Node4 values;
|
||||
u32 dest_elem = 0;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!instr.texs.IsComponentEnabled(component))
|
||||
continue;
|
||||
values[dest_elem++] = components[component];
|
||||
}
|
||||
if (dest_elem == 0)
|
||||
return;
|
||||
|
||||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
|
||||
|
||||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
|
||||
if (dest_elem <= 2) {
|
||||
SetRegister(bb, instr.gpr0, first_value);
|
||||
return;
|
||||
}
|
||||
|
||||
SetTemporal(bb, 0, first_value);
|
||||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
|
||||
|
||||
SetRegister(bb, instr.gpr0, GetTemporal(0));
|
||||
SetRegister(bb, instr.gpr28, GetTemporal(1));
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset) {
|
||||
const bool is_array = array;
|
||||
const bool is_shadow = depth_compare;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
|
||||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
|
||||
"This method is not supported.");
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
|
||||
|
||||
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
|
||||
process_mode == TextureProcessMode::LL ||
|
||||
process_mode == TextureProcessMode::LLA;
|
||||
|
||||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
const bool gl_lod_supported =
|
||||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
|
||||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
|
||||
|
||||
const OperationCode read_method =
|
||||
(lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
|
||||
|
||||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
|
||||
|
||||
Node bias = {};
|
||||
Node lod = {};
|
||||
if (process_mode != TextureProcessMode::None && gl_lod_supported) {
|
||||
switch (process_mode) {
|
||||
case TextureProcessMode::LZ:
|
||||
lod = Immediate(0.0f);
|
||||
break;
|
||||
case TextureProcessMode::LB:
|
||||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
// field with an offset depending on the usage of the other registers
|
||||
bias = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
case TextureProcessMode::LL:
|
||||
lod = GetRegister(instr.gpr20.Value() + bias_offset);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
}
|
||||
// 1D.DC in OpenGL the 2nd component is ignored.
|
||||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
|
||||
coords.push_back(Immediate(0.0f));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
||||
const bool lod_bias_enabled =
|
||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
||||
|
||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
const u64 last_coord_register =
|
||||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
const u32 bias_offset = coord_count > 2 ? 1 : 0;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
const bool last = (i == (coord_count - 1)) && (coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
|
||||
Node dc{};
|
||||
if (depth_compare) {
|
||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||
// or bias are used
|
||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
||||
dc = GetRegister(depth_register);
|
||||
}
|
||||
|
||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (size_t i = 0; i < coord_count; ++i)
|
||||
coords.push_back(GetRegister(coord_register + i));
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
|
||||
const std::size_t type_coord_count = GetCoordCount(texture_type);
|
||||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
// if is array gpr20 is used
|
||||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
|
||||
|
||||
const u64 last_coord_register =
|
||||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
|
||||
? static_cast<u64>(instr.gpr20.Value())
|
||||
: coord_register + 1;
|
||||
|
||||
std::vector<Node> coords;
|
||||
for (std::size_t i = 0; i < type_coord_count; ++i) {
|
||||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
|
||||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
|
||||
}
|
||||
|
||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||
// When lod is used always is in gpr20
|
||||
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
|
||||
|
||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
|
||||
std::size_t max_coords, std::size_t max_inputs) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
|
||||
if (total_coord_count > max_coords || total_reg_count > max_inputs) {
|
||||
UNIMPLEMENTED_MSG("Unsupported Texture operation");
|
||||
total_coord_count = std::min(total_coord_count, max_coords);
|
||||
}
|
||||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
total_coord_count +=
|
||||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
|
||||
|
||||
return {coord_count, total_coord_count};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
@@ -290,9 +290,7 @@ struct MetaTexture {
|
||||
const Sampler& sampler;
|
||||
Node array{};
|
||||
Node depth_compare{};
|
||||
Node bias{};
|
||||
Node lod{};
|
||||
Node component{};
|
||||
std::vector<Node> extras;
|
||||
u32 element{};
|
||||
};
|
||||
|
||||
@@ -616,7 +614,6 @@ private:
|
||||
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeConversion(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeMemory(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeTexture(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
|
||||
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
|
||||
|
||||
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
return {node, cursor};
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(node)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
const auto [found, internal_cursor] = FindOperation(
|
||||
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
|
||||
const auto& code = conditional->GetCode();
|
||||
const auto [found, internal_cursor] =
|
||||
FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
|
||||
if (found)
|
||||
return {found, cursor};
|
||||
}
|
||||
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
|
||||
return nullptr;
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
|
||||
const auto& conditional_code = conditional->GetCode();
|
||||
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
|
||||
const auto& code = conditional->GetCode();
|
||||
return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user