Compare commits
89 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70f7ff3db5 | ||
|
|
caa25146f2 | ||
|
|
432d48d9c8 | ||
|
|
cc92b7fd94 | ||
|
|
444b25bae1 | ||
|
|
2e4dde12c7 | ||
|
|
c0f17e1b27 | ||
|
|
4bda2b475f | ||
|
|
7ef897a277 | ||
|
|
457826a83b | ||
|
|
8b251fc3f6 | ||
|
|
3c05988df2 | ||
|
|
6d74490139 | ||
|
|
020dbcdbc7 | ||
|
|
5bcbb8de45 | ||
|
|
990fe2b3fc | ||
|
|
f6245dc40a | ||
|
|
eaca61e073 | ||
|
|
3e33a878dc | ||
|
|
1ee0540f82 | ||
|
|
58fec43768 | ||
|
|
12b4c9c04c | ||
|
|
68ed60cee4 | ||
|
|
6c812a0c84 | ||
|
|
625a4af73a | ||
|
|
9e2997c4b6 | ||
|
|
8804a4eb23 | ||
|
|
3f0985c7b0 | ||
|
|
c1cc99584c | ||
|
|
bbfad79c89 | ||
|
|
1428451722 | ||
|
|
e07976a22b | ||
|
|
9c9008ac81 | ||
|
|
8945fafcc0 | ||
|
|
f2aa816679 | ||
|
|
f4626512ff | ||
|
|
67d4f190f7 | ||
|
|
16809c1fa7 | ||
|
|
b56ad93bbc | ||
|
|
2d0c4f2b1d | ||
|
|
af5ecb0b15 | ||
|
|
688a9fbfa6 | ||
|
|
6c7eb81f7d | ||
|
|
e82e3e06be | ||
|
|
4d9af4a9d2 | ||
|
|
a4269c285a | ||
|
|
301e9bbc03 | ||
|
|
66ae79de13 | ||
|
|
b78328f19a | ||
|
|
3ecc03ec1b | ||
|
|
a0c697124c | ||
|
|
03ccd8bf43 | ||
|
|
bbeb6e460c | ||
|
|
ddbf851ef6 | ||
|
|
a045e860dd | ||
|
|
b62ffb612d | ||
|
|
6f031f08fe | ||
|
|
04cb05fce0 | ||
|
|
a7e610403d | ||
|
|
eceee8c3d9 | ||
|
|
2b110d61e7 | ||
|
|
6804a43f49 | ||
|
|
48bcb91a2e | ||
|
|
09c9be3703 | ||
|
|
80bcc18788 | ||
|
|
ebd811b535 | ||
|
|
306c791e67 | ||
|
|
f9c6d39a6c | ||
|
|
4814d87385 | ||
|
|
d09aa0182f | ||
|
|
581a7d785b | ||
|
|
3630bfaef3 | ||
|
|
2793304117 | ||
|
|
8d694701bc | ||
|
|
4c82e47edd | ||
|
|
d33251db93 | ||
|
|
f800e485c9 | ||
|
|
c897c55e3c | ||
|
|
cb1497d0d7 | ||
|
|
ce448ce770 | ||
|
|
18637766ef | ||
|
|
aad0cbf024 | ||
|
|
93ac5a6a6d | ||
|
|
c541559767 | ||
|
|
0f89828073 | ||
|
|
a5a94f52ff | ||
|
|
d46c9c4659 | ||
|
|
aa13ee5c4a | ||
|
|
10eaf31af3 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -30,7 +30,7 @@
|
||||
url = https://github.com/yuzu-emu/sirit
|
||||
[submodule "mbedtls"]
|
||||
path = externals/mbedtls
|
||||
url = https://github.com/yuzu-emu/mbedtls
|
||||
url = https://github.com/Mbed-TLS/mbedtls
|
||||
[submodule "xbyak"]
|
||||
path = externals/xbyak
|
||||
url = https://github.com/herumi/xbyak.git
|
||||
|
||||
@@ -3,13 +3,8 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.22)
|
||||
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
include(CMakeDependentOption)
|
||||
|
||||
|
||||
43
CMakeModules/FindMbedTLS.cmake
Normal file
43
CMakeModules/FindMbedTLS.cmake
Normal file
@@ -0,0 +1,43 @@
|
||||
# SPDX-FileCopyrightText: 2021 Andrea Pappacoda <andrea@pappacoda.it>
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# MbedTLS 3.0.0 will ship with a CMake package config file,
|
||||
# see ARMmbed/mbedtls@d259e347e6e3a630acfc1a811709ca05e5d3b92e,
|
||||
# so when yuzu will switch to that version this won't be required anymore.
|
||||
#
|
||||
# yuzu only uses mbedcrypto, searching for mbedtls and mbedx509 is not
|
||||
# needed.
|
||||
|
||||
find_path(MbedTLS_INCLUDE_DIR mbedtls/cipher.h)
|
||||
|
||||
find_library(MbedTLS_LIBRARY mbedcrypto)
|
||||
|
||||
if (MbedTLS_INCLUDE_DIR AND MbedTLS_LIBRARY)
|
||||
# Check for CMAC support
|
||||
include(CheckSymbolExists)
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${MbedTLS_LIBRARY})
|
||||
check_symbol_exists(mbedtls_cipher_cmac ${MbedTLS_INCLUDE_DIR}/mbedtls/cmac.h mbedcrypto_HAS_CMAC)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
|
||||
# Check if version 2.x is available
|
||||
file(READ "${MbedTLS_INCLUDE_DIR}/mbedtls/version.h" MbedTLS_VERSION_FILE)
|
||||
string(REGEX MATCH "#define[ ]+MBEDTLS_VERSION_STRING[ ]+\"([0-9.]+)\"" _ ${MbedTLS_VERSION_FILE})
|
||||
set(MbedTLS_VERSION "${CMAKE_MATCH_1}")
|
||||
|
||||
if (NOT TARGET MbedTLS::mbedcrypto)
|
||||
add_library(MbedTLS::mbedcrypto UNKNOWN IMPORTED GLOBAL)
|
||||
set_target_properties(MbedTLS::mbedcrypto PROPERTIES
|
||||
IMPORTED_LOCATION "${MbedTLS_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${MbedTLS_INCLUDE_DIR}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(MbedTLS
|
||||
REQUIRED_VARS
|
||||
MbedTLS_LIBRARY
|
||||
MbedTLS_INCLUDE_DIR
|
||||
mbedcrypto_HAS_CMAC
|
||||
VERSION_VAR MbedTLS_VERSION
|
||||
)
|
||||
39
externals/CMakeLists.txt
vendored
39
externals/CMakeLists.txt
vendored
@@ -1,9 +1,9 @@
|
||||
# SPDX-FileCopyrightText: 2016 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
# Dynarmic has cmake_minimum_required(3.12) and we may want to override
|
||||
# some of its variables, which is only possible in 3.13+
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
# xbyak
|
||||
if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
|
||||
@@ -12,8 +12,7 @@ endif()
|
||||
|
||||
# Dynarmic
|
||||
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
|
||||
set(DYNARMIC_NO_BUNDLED_FMT ON)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON)
|
||||
add_subdirectory(dynarmic EXCLUDE_FROM_ALL)
|
||||
add_library(dynarmic::dynarmic ALIAS dynarmic)
|
||||
endif()
|
||||
@@ -31,9 +30,23 @@ if (NOT TARGET inih::INIReader)
|
||||
add_subdirectory(inih)
|
||||
endif()
|
||||
|
||||
# mbedtls
|
||||
add_subdirectory(mbedtls EXCLUDE_FROM_ALL)
|
||||
target_include_directories(mbedtls PUBLIC ./mbedtls/include)
|
||||
# MbedTLS
|
||||
find_package(MbedTLS 2.16)
|
||||
if(NOT MbedTLS_FOUND)
|
||||
message(STATUS "MbedTLS not found, falling back to externals")
|
||||
|
||||
set(ENABLE_PROGRAMS OFF CACHE BOOL "")
|
||||
set(ENABLE_TESTING OFF CACHE BOOL "")
|
||||
|
||||
# Edit MbedTLS config header to enable CMAC
|
||||
file(READ "./mbedtls/include/mbedtls/config.h" MbedTLS_CONFIG_FILE)
|
||||
string(REPLACE "//#define MBEDTLS_CMAC_C" "#define MBEDTLS_CMAC_C" MbedTLS_CONFIG_FILE_CMAC "${MbedTLS_CONFIG_FILE}")
|
||||
file(WRITE "./mbedtls/include/mbedtls/config.h" "${MbedTLS_CONFIG_FILE_CMAC}")
|
||||
|
||||
add_subdirectory(mbedtls EXCLUDE_FROM_ALL)
|
||||
target_include_directories(mbedcrypto PUBLIC ./mbedtls/include)
|
||||
add_library(MbedTLS::mbedcrypto ALIAS mbedcrypto)
|
||||
endif()
|
||||
|
||||
# MicroProfile
|
||||
add_library(microprofile INTERFACE)
|
||||
@@ -60,10 +73,10 @@ if (YUZU_USE_EXTERNAL_SDL2)
|
||||
Locale Power Render)
|
||||
foreach(_SUB ${SDL_UNUSED_SUBSYSTEMS})
|
||||
string(TOUPPER ${_SUB} _OPT)
|
||||
option(SDL_${_OPT} "" OFF)
|
||||
set(SDL_${_OPT} OFF)
|
||||
endforeach()
|
||||
|
||||
option(HIDAPI "" ON)
|
||||
set(HIDAPI ON)
|
||||
endif()
|
||||
set(SDL_STATIC ON)
|
||||
set(SDL_SHARED OFF)
|
||||
@@ -83,7 +96,7 @@ endif()
|
||||
|
||||
# Cubeb
|
||||
if (ENABLE_CUBEB AND NOT TARGET cubeb::cubeb)
|
||||
set(BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(BUILD_TESTS OFF)
|
||||
add_subdirectory(cubeb EXCLUDE_FROM_ALL)
|
||||
add_library(cubeb::cubeb ALIAS cubeb)
|
||||
endif()
|
||||
@@ -98,6 +111,7 @@ endif()
|
||||
# Sirit
|
||||
add_subdirectory(sirit EXCLUDE_FROM_ALL)
|
||||
|
||||
# httplib
|
||||
if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
||||
if (NOT WIN32)
|
||||
find_package(OpenSSL 1.1)
|
||||
@@ -108,7 +122,7 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
||||
|
||||
if (WIN32 OR NOT OPENSSL_FOUND)
|
||||
# LibreSSL
|
||||
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||
set(LIBRESSL_SKIP_INSTALL ON)
|
||||
set(OPENSSLDIR "/etc/ssl/")
|
||||
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||
@@ -118,7 +132,6 @@ if (ENABLE_WEB_SERVICE AND NOT TARGET httplib::httplib)
|
||||
DEFINITION OPENSSL_LIBS)
|
||||
endif()
|
||||
|
||||
# httplib
|
||||
add_library(httplib INTERFACE)
|
||||
target_include_directories(httplib INTERFACE ./cpp-httplib)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
|
||||
2
externals/dynarmic
vendored
2
externals/dynarmic
vendored
Submodule externals/dynarmic updated: bd570e093c...a1cbea7948
2
externals/mbedtls
vendored
2
externals/mbedtls
vendored
Submodule externals/mbedtls updated: 8c88150ca1...dd79db1001
2
externals/sirit
vendored
2
externals/sirit
vendored
Submodule externals/sirit updated: d7ad93a888...ab75463999
2
externals/xbyak
vendored
2
externals/xbyak
vendored
Submodule externals/xbyak updated: 348e3e548e...a1ac3750f9
@@ -97,6 +97,7 @@ add_library(common STATIC
|
||||
point.h
|
||||
precompiled_headers.h
|
||||
quaternion.h
|
||||
range_map.h
|
||||
reader_writer_queue.h
|
||||
ring_buffer.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp
|
||||
|
||||
139
src/common/range_map.h
Normal file
139
src/common/range_map.h
Normal file
@@ -0,0 +1,139 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
template <typename KeyTBase, typename ValueT>
|
||||
class RangeMap {
|
||||
private:
|
||||
using KeyT =
|
||||
std::conditional_t<std::is_signed_v<KeyTBase>, KeyTBase, std::make_signed_t<KeyTBase>>;
|
||||
|
||||
public:
|
||||
explicit RangeMap(ValueT null_value_) : null_value{null_value_} {
|
||||
container.emplace(std::numeric_limits<KeyT>::min(), null_value);
|
||||
};
|
||||
~RangeMap() = default;
|
||||
|
||||
void Map(KeyTBase address, KeyTBase address_end, ValueT value) {
|
||||
KeyT new_address = static_cast<KeyT>(address);
|
||||
KeyT new_address_end = static_cast<KeyT>(address_end);
|
||||
if (new_address < 0) {
|
||||
new_address = 0;
|
||||
}
|
||||
if (new_address_end < 0) {
|
||||
new_address_end = 0;
|
||||
}
|
||||
InternalMap(new_address, new_address_end, value);
|
||||
}
|
||||
|
||||
void Unmap(KeyTBase address, KeyTBase address_end) {
|
||||
Map(address, address_end, null_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return 0;
|
||||
}
|
||||
return ContinousSizeInternal(new_address);
|
||||
}
|
||||
|
||||
[[nodiscard]] ValueT GetValueAt(KeyT address) const {
|
||||
const KeyT new_address = static_cast<KeyT>(address);
|
||||
if (new_address < 0) {
|
||||
return null_value;
|
||||
}
|
||||
return GetValueInternal(new_address);
|
||||
}
|
||||
|
||||
private:
|
||||
using MapType = std::map<KeyT, ValueT>;
|
||||
using IteratorType = typename MapType::iterator;
|
||||
using ConstIteratorType = typename MapType::const_iterator;
|
||||
|
||||
size_t ContinousSizeInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end() || it->second == null_value) {
|
||||
return 0;
|
||||
}
|
||||
const auto it_end = std::next(it);
|
||||
if (it_end == container.end()) {
|
||||
return std::numeric_limits<KeyT>::max() - address;
|
||||
}
|
||||
return it_end->first - address;
|
||||
}
|
||||
|
||||
ValueT GetValueInternal(KeyT address) const {
|
||||
const auto it = GetFirstElementBeforeOrOn(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it;
|
||||
}
|
||||
if (it != container.end() && (it->first == address)) {
|
||||
return it;
|
||||
}
|
||||
--it;
|
||||
return it;
|
||||
}
|
||||
|
||||
ValueT GetFirstValueWithin(KeyT address) {
|
||||
auto it = container.lower_bound(address);
|
||||
if (it == container.begin()) {
|
||||
return it->second;
|
||||
}
|
||||
if (it == container.end()) [[unlikely]] { // this would be a bug
|
||||
return null_value;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ValueT GetLastValueWithin(KeyT address) {
|
||||
auto it = container.upper_bound(address);
|
||||
if (it == container.end()) {
|
||||
return null_value;
|
||||
}
|
||||
if (it == container.begin()) [[unlikely]] { // this would be a bug
|
||||
return it->second;
|
||||
}
|
||||
--it;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void InternalMap(KeyT address, KeyT address_end, ValueT value) {
|
||||
const bool must_add_start = GetFirstValueWithin(address) != value;
|
||||
const ValueT last_value = GetLastValueWithin(address_end);
|
||||
const bool must_add_end = last_value != value;
|
||||
auto it = container.lower_bound(address);
|
||||
const auto it_end = container.upper_bound(address_end);
|
||||
while (it != it_end) {
|
||||
it = container.erase(it);
|
||||
}
|
||||
if (must_add_start) {
|
||||
container.emplace(address, value);
|
||||
}
|
||||
if (must_add_end) {
|
||||
container.emplace(address_end, last_value);
|
||||
}
|
||||
}
|
||||
|
||||
ValueT null_value;
|
||||
MapType container;
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
@@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||
// Renderer
|
||||
values.fsr_sharpening_slider.SetGlobal(true);
|
||||
values.renderer_backend.SetGlobal(true);
|
||||
values.renderer_force_max_clock.SetGlobal(true);
|
||||
values.vulkan_device.SetGlobal(true);
|
||||
values.aspect_ratio.SetGlobal(true);
|
||||
values.max_anisotropy.SetGlobal(true);
|
||||
@@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||
values.use_asynchronous_shaders.SetGlobal(true);
|
||||
values.use_fast_gpu_time.SetGlobal(true);
|
||||
values.use_pessimistic_flushes.SetGlobal(true);
|
||||
values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
|
||||
values.bg_red.SetGlobal(true);
|
||||
values.bg_green.SetGlobal(true);
|
||||
values.bg_blue.SetGlobal(true);
|
||||
|
||||
@@ -415,6 +415,7 @@ struct Values {
|
||||
// Renderer
|
||||
SwitchableSetting<RendererBackend, true> renderer_backend{
|
||||
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
|
||||
SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
|
||||
Setting<bool> renderer_debug{false, "debug"};
|
||||
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
|
||||
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
|
||||
@@ -451,6 +452,8 @@ struct Values {
|
||||
SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
|
||||
SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
|
||||
SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
|
||||
SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
|
||||
"use_vulkan_driver_pipeline_cache"};
|
||||
|
||||
SwitchableSetting<u8> bg_red{0, "bg_red"};
|
||||
SwitchableSetting<u8> bg_green{0, "bg_green"};
|
||||
@@ -531,6 +534,7 @@ struct Values {
|
||||
Setting<bool> reporting_services{false, "reporting_services"};
|
||||
Setting<bool> quest_flag{false, "quest_flag"};
|
||||
Setting<bool> disable_macro_jit{false, "disable_macro_jit"};
|
||||
Setting<bool> disable_macro_hle{false, "disable_macro_hle"};
|
||||
Setting<bool> extended_logging{false, "extended_logging"};
|
||||
Setting<bool> use_debug_asserts{false, "use_debug_asserts"};
|
||||
Setting<bool> use_auto_stub{false, "use_auto_stub"};
|
||||
|
||||
@@ -226,7 +226,6 @@ add_library(core STATIC
|
||||
hle/kernel/k_page_buffer.h
|
||||
hle/kernel/k_page_heap.cpp
|
||||
hle/kernel/k_page_heap.h
|
||||
hle/kernel/k_page_group.cpp
|
||||
hle/kernel/k_page_group.h
|
||||
hle/kernel/k_page_table.cpp
|
||||
hle/kernel/k_page_table.h
|
||||
@@ -806,7 +805,7 @@ endif()
|
||||
create_target_directory_groups(core)
|
||||
|
||||
target_link_libraries(core PUBLIC common PRIVATE audio_core network video_core)
|
||||
target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::opus)
|
||||
target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json MbedTLS::mbedcrypto Opus::opus)
|
||||
if (MINGW)
|
||||
target_link_libraries(core PRIVATE ${MSWSOCK_LIBRARY})
|
||||
endif()
|
||||
|
||||
@@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
||||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
||||
@@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
||||
config.enable_cycle_counting = true;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = 128_MiB;
|
||||
#else
|
||||
config.code_cache_size = 512_MiB;
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
|
||||
@@ -11,6 +11,11 @@
|
||||
namespace Core::HID {
|
||||
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
|
||||
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
|
||||
// Use a common UUID for TAS and Virtual Gamepad
|
||||
constexpr Common::UUID TAS_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
constexpr Common::UUID VIRTUAL_UUID =
|
||||
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
|
||||
|
||||
@@ -348,10 +353,6 @@ void EmulatedController::ReloadInput() {
|
||||
}
|
||||
}
|
||||
|
||||
// Use a common UUID for TAS
|
||||
static constexpr Common::UUID TAS_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register TAS devices. No need to force update
|
||||
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
|
||||
if (!tas_button_devices[index]) {
|
||||
@@ -377,10 +378,6 @@ void EmulatedController::ReloadInput() {
|
||||
});
|
||||
}
|
||||
|
||||
// Use a common UUID for Virtual Gamepad
|
||||
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
|
||||
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
|
||||
|
||||
// Register virtual devices. No need to force update
|
||||
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
|
||||
if (!virtual_button_devices[index]) {
|
||||
@@ -780,7 +777,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
|
||||
|
||||
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
|
||||
if (controller.stick_values[index].uuid != uuid) {
|
||||
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
|
||||
const bool is_tas = uuid == TAS_UUID;
|
||||
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
|
||||
return;
|
||||
}
|
||||
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
|
||||
!stick_value.right) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,13 +27,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
|
||||
auto& page_table = m_owner->PageTable();
|
||||
|
||||
// Construct the page group.
|
||||
m_page_group.emplace(kernel, page_table.GetBlockInfoManager());
|
||||
m_page_group = {};
|
||||
|
||||
// Lock the memory.
|
||||
R_TRY(page_table.LockForCodeMemory(std::addressof(*m_page_group), addr, size))
|
||||
R_TRY(page_table.LockForCodeMemory(&m_page_group, addr, size))
|
||||
|
||||
// Clear the memory.
|
||||
for (const auto& block : *m_page_group) {
|
||||
for (const auto& block : m_page_group.Nodes()) {
|
||||
std::memset(device_memory.GetPointer<void>(block.GetAddress()), 0xFF, block.GetSize());
|
||||
}
|
||||
|
||||
@@ -51,13 +51,12 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
|
||||
void KCodeMemory::Finalize() {
|
||||
// Unlock.
|
||||
if (!m_is_mapped && !m_is_owner_mapped) {
|
||||
const size_t size = m_page_group->GetNumPages() * PageSize;
|
||||
m_owner->PageTable().UnlockForCodeMemory(m_address, size, *m_page_group);
|
||||
const size_t size = m_page_group.GetNumPages() * PageSize;
|
||||
m_owner->PageTable().UnlockForCodeMemory(m_address, size, m_page_group);
|
||||
}
|
||||
|
||||
// Close the page group.
|
||||
m_page_group->Close();
|
||||
m_page_group->Finalize();
|
||||
m_page_group = {};
|
||||
|
||||
// Close our reference to our owner.
|
||||
m_owner->Close();
|
||||
@@ -65,7 +64,7 @@ void KCodeMemory::Finalize() {
|
||||
|
||||
Result KCodeMemory::Map(VAddr address, size_t size) {
|
||||
// Validate the size.
|
||||
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
|
||||
// Lock ourselves.
|
||||
KScopedLightLock lk(m_lock);
|
||||
@@ -75,7 +74,7 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
|
||||
|
||||
// Map the memory.
|
||||
R_TRY(kernel.CurrentProcess()->PageTable().MapPages(
|
||||
address, *m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite));
|
||||
address, m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite));
|
||||
|
||||
// Mark ourselves as mapped.
|
||||
m_is_mapped = true;
|
||||
@@ -85,13 +84,13 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
|
||||
|
||||
Result KCodeMemory::Unmap(VAddr address, size_t size) {
|
||||
// Validate the size.
|
||||
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
|
||||
// Lock ourselves.
|
||||
KScopedLightLock lk(m_lock);
|
||||
|
||||
// Unmap the memory.
|
||||
R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, *m_page_group,
|
||||
R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, m_page_group,
|
||||
KMemoryState::CodeOut));
|
||||
|
||||
// Mark ourselves as unmapped.
|
||||
@@ -102,7 +101,7 @@ Result KCodeMemory::Unmap(VAddr address, size_t size) {
|
||||
|
||||
Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) {
|
||||
// Validate the size.
|
||||
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
|
||||
// Lock ourselves.
|
||||
KScopedLightLock lk(m_lock);
|
||||
@@ -126,7 +125,7 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
|
||||
|
||||
// Map the memory.
|
||||
R_TRY(
|
||||
m_owner->PageTable().MapPages(address, *m_page_group, KMemoryState::GeneratedCode, k_perm));
|
||||
m_owner->PageTable().MapPages(address, m_page_group, KMemoryState::GeneratedCode, k_perm));
|
||||
|
||||
// Mark ourselves as mapped.
|
||||
m_is_owner_mapped = true;
|
||||
@@ -136,13 +135,13 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
|
||||
|
||||
Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) {
|
||||
// Validate the size.
|
||||
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
|
||||
|
||||
// Lock ourselves.
|
||||
KScopedLightLock lk(m_lock);
|
||||
|
||||
// Unmap the memory.
|
||||
R_TRY(m_owner->PageTable().UnmapPages(address, *m_page_group, KMemoryState::GeneratedCode));
|
||||
R_TRY(m_owner->PageTable().UnmapPages(address, m_page_group, KMemoryState::GeneratedCode));
|
||||
|
||||
// Mark ourselves as unmapped.
|
||||
m_is_owner_mapped = false;
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "core/hle/kernel/k_auto_object.h"
|
||||
@@ -51,11 +49,11 @@ public:
|
||||
return m_address;
|
||||
}
|
||||
size_t GetSize() const {
|
||||
return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0;
|
||||
return m_is_initialized ? m_page_group.GetNumPages() * PageSize : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<KPageGroup> m_page_group{};
|
||||
KPageGroup m_page_group{};
|
||||
KProcess* m_owner{};
|
||||
VAddr m_address{};
|
||||
KLightLock m_lock;
|
||||
|
||||
@@ -223,7 +223,7 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages,
|
||||
|
||||
// Ensure that we don't leave anything un-freed.
|
||||
ON_RESULT_FAILURE {
|
||||
for (const auto& it : *out) {
|
||||
for (const auto& it : out->Nodes()) {
|
||||
auto& manager = this->GetManager(it.GetAddress());
|
||||
const size_t node_num_pages = std::min<u64>(
|
||||
it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
|
||||
@@ -285,7 +285,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op
|
||||
m_has_optimized_process[static_cast<size_t>(pool)], true));
|
||||
|
||||
// Open the first reference to the pages.
|
||||
for (const auto& block : *out) {
|
||||
for (const auto& block : out->Nodes()) {
|
||||
PAddr cur_address = block.GetAddress();
|
||||
size_t remaining_pages = block.GetNumPages();
|
||||
while (remaining_pages > 0) {
|
||||
@@ -335,7 +335,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
|
||||
// Perform optimized memory tracking, if we should.
|
||||
if (optimized) {
|
||||
// Iterate over the allocated blocks.
|
||||
for (const auto& block : *out) {
|
||||
for (const auto& block : out->Nodes()) {
|
||||
// Get the block extents.
|
||||
const PAddr block_address = block.GetAddress();
|
||||
const size_t block_pages = block.GetNumPages();
|
||||
@@ -391,7 +391,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
|
||||
}
|
||||
} else {
|
||||
// Set all the allocated memory.
|
||||
for (const auto& block : *out) {
|
||||
for (const auto& block : out->Nodes()) {
|
||||
std::memset(m_system.DeviceMemory().GetPointer<void>(block.GetAddress()), fill_pattern,
|
||||
block.GetSize());
|
||||
}
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "core/hle/kernel/k_dynamic_resource_manager.h"
|
||||
#include "core/hle/kernel/k_memory_manager.h"
|
||||
#include "core/hle/kernel/k_page_group.h"
|
||||
#include "core/hle/kernel/kernel.h"
|
||||
#include "core/hle/kernel/svc_results.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
void KPageGroup::Finalize() {
|
||||
KBlockInfo* cur = m_first_block;
|
||||
while (cur != nullptr) {
|
||||
KBlockInfo* next = cur->GetNext();
|
||||
m_manager->Free(cur);
|
||||
cur = next;
|
||||
}
|
||||
|
||||
m_first_block = nullptr;
|
||||
m_last_block = nullptr;
|
||||
}
|
||||
|
||||
void KPageGroup::CloseAndReset() {
|
||||
auto& mm = m_kernel.MemoryManager();
|
||||
|
||||
KBlockInfo* cur = m_first_block;
|
||||
while (cur != nullptr) {
|
||||
KBlockInfo* next = cur->GetNext();
|
||||
mm.Close(cur->GetAddress(), cur->GetNumPages());
|
||||
m_manager->Free(cur);
|
||||
cur = next;
|
||||
}
|
||||
|
||||
m_first_block = nullptr;
|
||||
m_last_block = nullptr;
|
||||
}
|
||||
|
||||
size_t KPageGroup::GetNumPages() const {
|
||||
size_t num_pages = 0;
|
||||
|
||||
for (const auto& it : *this) {
|
||||
num_pages += it.GetNumPages();
|
||||
}
|
||||
|
||||
return num_pages;
|
||||
}
|
||||
|
||||
Result KPageGroup::AddBlock(KPhysicalAddress addr, size_t num_pages) {
|
||||
// Succeed immediately if we're adding no pages.
|
||||
R_SUCCEED_IF(num_pages == 0);
|
||||
|
||||
// Check for overflow.
|
||||
ASSERT(addr < addr + num_pages * PageSize);
|
||||
|
||||
// Try to just append to the last block.
|
||||
if (m_last_block != nullptr) {
|
||||
R_SUCCEED_IF(m_last_block->TryConcatenate(addr, num_pages));
|
||||
}
|
||||
|
||||
// Allocate a new block.
|
||||
KBlockInfo* new_block = m_manager->Allocate();
|
||||
R_UNLESS(new_block != nullptr, ResultOutOfResource);
|
||||
|
||||
// Initialize the block.
|
||||
new_block->Initialize(addr, num_pages);
|
||||
|
||||
// Add the block to our list.
|
||||
if (m_last_block != nullptr) {
|
||||
m_last_block->SetNext(new_block);
|
||||
} else {
|
||||
m_first_block = new_block;
|
||||
}
|
||||
m_last_block = new_block;
|
||||
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
void KPageGroup::Open() const {
|
||||
auto& mm = m_kernel.MemoryManager();
|
||||
|
||||
for (const auto& it : *this) {
|
||||
mm.Open(it.GetAddress(), it.GetNumPages());
|
||||
}
|
||||
}
|
||||
|
||||
void KPageGroup::OpenFirst() const {
|
||||
auto& mm = m_kernel.MemoryManager();
|
||||
|
||||
for (const auto& it : *this) {
|
||||
mm.OpenFirst(it.GetAddress(), it.GetNumPages());
|
||||
}
|
||||
}
|
||||
|
||||
void KPageGroup::Close() const {
|
||||
auto& mm = m_kernel.MemoryManager();
|
||||
|
||||
for (const auto& it : *this) {
|
||||
mm.Close(it.GetAddress(), it.GetNumPages());
|
||||
}
|
||||
}
|
||||
|
||||
bool KPageGroup::IsEquivalentTo(const KPageGroup& rhs) const {
|
||||
auto lit = this->begin();
|
||||
auto rit = rhs.begin();
|
||||
auto lend = this->end();
|
||||
auto rend = rhs.end();
|
||||
|
||||
while (lit != lend && rit != rend) {
|
||||
if (*lit != *rit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++lit;
|
||||
++rit;
|
||||
}
|
||||
|
||||
return lit == lend && rit == rend;
|
||||
}
|
||||
|
||||
} // namespace Kernel
|
||||
@@ -1,4 +1,4 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
@@ -13,23 +13,24 @@
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
class KBlockInfoManager;
|
||||
class KernelCore;
|
||||
class KPageGroup;
|
||||
|
||||
class KBlockInfo {
|
||||
public:
|
||||
constexpr explicit KBlockInfo() : m_next(nullptr) {}
|
||||
private:
|
||||
friend class KPageGroup;
|
||||
|
||||
constexpr void Initialize(KPhysicalAddress addr, size_t np) {
|
||||
public:
|
||||
constexpr KBlockInfo() = default;
|
||||
|
||||
constexpr void Initialize(PAddr addr, size_t np) {
|
||||
ASSERT(Common::IsAligned(addr, PageSize));
|
||||
ASSERT(static_cast<u32>(np) == np);
|
||||
|
||||
m_page_index = static_cast<u32>(addr / PageSize);
|
||||
m_page_index = static_cast<u32>(addr) / PageSize;
|
||||
m_num_pages = static_cast<u32>(np);
|
||||
}
|
||||
|
||||
constexpr KPhysicalAddress GetAddress() const {
|
||||
constexpr PAddr GetAddress() const {
|
||||
return m_page_index * PageSize;
|
||||
}
|
||||
constexpr size_t GetNumPages() const {
|
||||
@@ -38,10 +39,10 @@ public:
|
||||
constexpr size_t GetSize() const {
|
||||
return this->GetNumPages() * PageSize;
|
||||
}
|
||||
constexpr KPhysicalAddress GetEndAddress() const {
|
||||
constexpr PAddr GetEndAddress() const {
|
||||
return (m_page_index + m_num_pages) * PageSize;
|
||||
}
|
||||
constexpr KPhysicalAddress GetLastAddress() const {
|
||||
constexpr PAddr GetLastAddress() const {
|
||||
return this->GetEndAddress() - 1;
|
||||
}
|
||||
|
||||
@@ -61,8 +62,8 @@ public:
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
constexpr bool IsStrictlyBefore(KPhysicalAddress addr) const {
|
||||
const KPhysicalAddress end = this->GetEndAddress();
|
||||
constexpr bool IsStrictlyBefore(PAddr addr) const {
|
||||
const PAddr end = this->GetEndAddress();
|
||||
|
||||
if (m_page_index != 0 && end == 0) {
|
||||
return false;
|
||||
@@ -71,11 +72,11 @@ public:
|
||||
return end < addr;
|
||||
}
|
||||
|
||||
constexpr bool operator<(KPhysicalAddress addr) const {
|
||||
constexpr bool operator<(PAddr addr) const {
|
||||
return this->IsStrictlyBefore(addr);
|
||||
}
|
||||
|
||||
constexpr bool TryConcatenate(KPhysicalAddress addr, size_t np) {
|
||||
constexpr bool TryConcatenate(PAddr addr, size_t np) {
|
||||
if (addr != 0 && addr == this->GetEndAddress()) {
|
||||
m_num_pages += static_cast<u32>(np);
|
||||
return true;
|
||||
@@ -89,118 +90,96 @@ private:
|
||||
}
|
||||
|
||||
private:
|
||||
friend class KPageGroup;
|
||||
|
||||
KBlockInfo* m_next{};
|
||||
u32 m_page_index{};
|
||||
u32 m_num_pages{};
|
||||
};
|
||||
static_assert(sizeof(KBlockInfo) <= 0x10);
|
||||
|
||||
class KPageGroup {
|
||||
class KPageGroup final {
|
||||
public:
|
||||
class Iterator {
|
||||
class Node final {
|
||||
public:
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = const KBlockInfo;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type*;
|
||||
using reference = value_type&;
|
||||
constexpr Node(u64 addr_, std::size_t num_pages_) : addr{addr_}, num_pages{num_pages_} {}
|
||||
|
||||
constexpr explicit Iterator(pointer n) : m_node(n) {}
|
||||
|
||||
constexpr bool operator==(const Iterator& rhs) const {
|
||||
return m_node == rhs.m_node;
|
||||
}
|
||||
constexpr bool operator!=(const Iterator& rhs) const {
|
||||
return !(*this == rhs);
|
||||
constexpr u64 GetAddress() const {
|
||||
return addr;
|
||||
}
|
||||
|
||||
constexpr pointer operator->() const {
|
||||
return m_node;
|
||||
}
|
||||
constexpr reference operator*() const {
|
||||
return *m_node;
|
||||
constexpr std::size_t GetNumPages() const {
|
||||
return num_pages;
|
||||
}
|
||||
|
||||
constexpr Iterator& operator++() {
|
||||
m_node = m_node->GetNext();
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr Iterator operator++(int) {
|
||||
const Iterator it{*this};
|
||||
++(*this);
|
||||
return it;
|
||||
constexpr std::size_t GetSize() const {
|
||||
return GetNumPages() * PageSize;
|
||||
}
|
||||
|
||||
private:
|
||||
pointer m_node{};
|
||||
u64 addr{};
|
||||
std::size_t num_pages{};
|
||||
};
|
||||
|
||||
explicit KPageGroup(KernelCore& kernel, KBlockInfoManager* m)
|
||||
: m_kernel{kernel}, m_manager{m} {}
|
||||
~KPageGroup() {
|
||||
this->Finalize();
|
||||
}
|
||||
|
||||
void CloseAndReset();
|
||||
void Finalize();
|
||||
|
||||
Iterator begin() const {
|
||||
return Iterator{m_first_block};
|
||||
}
|
||||
Iterator end() const {
|
||||
return Iterator{nullptr};
|
||||
}
|
||||
bool empty() const {
|
||||
return m_first_block == nullptr;
|
||||
}
|
||||
|
||||
Result AddBlock(KPhysicalAddress addr, size_t num_pages);
|
||||
void Open() const;
|
||||
void OpenFirst() const;
|
||||
void Close() const;
|
||||
|
||||
size_t GetNumPages() const;
|
||||
|
||||
bool IsEquivalentTo(const KPageGroup& rhs) const;
|
||||
|
||||
bool operator==(const KPageGroup& rhs) const {
|
||||
return this->IsEquivalentTo(rhs);
|
||||
}
|
||||
|
||||
bool operator!=(const KPageGroup& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
KernelCore& m_kernel;
|
||||
KBlockInfo* m_first_block{};
|
||||
KBlockInfo* m_last_block{};
|
||||
KBlockInfoManager* m_manager{};
|
||||
};
|
||||
|
||||
class KScopedPageGroup {
|
||||
public:
|
||||
explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) {
|
||||
if (m_pg) {
|
||||
m_pg->Open();
|
||||
}
|
||||
}
|
||||
explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {}
|
||||
~KScopedPageGroup() {
|
||||
if (m_pg) {
|
||||
m_pg->Close();
|
||||
}
|
||||
KPageGroup() = default;
|
||||
KPageGroup(u64 address, u64 num_pages) {
|
||||
ASSERT(AddBlock(address, num_pages).IsSuccess());
|
||||
}
|
||||
|
||||
void CancelClose() {
|
||||
m_pg = nullptr;
|
||||
constexpr std::list<Node>& Nodes() {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
constexpr const std::list<Node>& Nodes() const {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
std::size_t GetNumPages() const {
|
||||
std::size_t num_pages = 0;
|
||||
for (const Node& node : nodes) {
|
||||
num_pages += node.GetNumPages();
|
||||
}
|
||||
return num_pages;
|
||||
}
|
||||
|
||||
bool IsEqual(KPageGroup& other) const {
|
||||
auto this_node = nodes.begin();
|
||||
auto other_node = other.nodes.begin();
|
||||
while (this_node != nodes.end() && other_node != other.nodes.end()) {
|
||||
if (this_node->GetAddress() != other_node->GetAddress() ||
|
||||
this_node->GetNumPages() != other_node->GetNumPages()) {
|
||||
return false;
|
||||
}
|
||||
this_node = std::next(this_node);
|
||||
other_node = std::next(other_node);
|
||||
}
|
||||
|
||||
return this_node == nodes.end() && other_node == other.nodes.end();
|
||||
}
|
||||
|
||||
Result AddBlock(u64 address, u64 num_pages) {
|
||||
if (!num_pages) {
|
||||
return ResultSuccess;
|
||||
}
|
||||
if (!nodes.empty()) {
|
||||
const auto node = nodes.back();
|
||||
if (node.GetAddress() + node.GetNumPages() * PageSize == address) {
|
||||
address = node.GetAddress();
|
||||
num_pages += node.GetNumPages();
|
||||
nodes.pop_back();
|
||||
}
|
||||
}
|
||||
nodes.push_back({address, num_pages});
|
||||
return ResultSuccess;
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
return nodes.empty();
|
||||
}
|
||||
|
||||
void Finalize() {}
|
||||
|
||||
private:
|
||||
const KPageGroup* m_pg{};
|
||||
std::list<Node> nodes;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -100,7 +100,7 @@ constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType a
|
||||
|
||||
KPageTable::KPageTable(Core::System& system_)
|
||||
: m_general_lock{system_.Kernel()},
|
||||
m_map_physical_memory_lock{system_.Kernel()}, m_system{system_}, m_kernel{system_.Kernel()} {}
|
||||
m_map_physical_memory_lock{system_.Kernel()}, m_system{system_} {}
|
||||
|
||||
KPageTable::~KPageTable() = default;
|
||||
|
||||
@@ -373,7 +373,7 @@ Result KPageTable::MapProcessCode(VAddr addr, size_t num_pages, KMemoryState sta
|
||||
m_memory_block_slab_manager);
|
||||
|
||||
// Allocate and open.
|
||||
KPageGroup pg{m_kernel, m_block_info_manager};
|
||||
KPageGroup pg;
|
||||
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
|
||||
&pg, num_pages,
|
||||
KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option)));
|
||||
@@ -432,7 +432,7 @@ Result KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, size_t si
|
||||
const size_t num_pages = size / PageSize;
|
||||
|
||||
// Create page groups for the memory being mapped.
|
||||
KPageGroup pg{m_kernel, m_block_info_manager};
|
||||
KPageGroup pg;
|
||||
AddRegionToPages(src_address, num_pages, pg);
|
||||
|
||||
// Reprotect the source as kernel-read/not mapped.
|
||||
@@ -593,7 +593,7 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
|
||||
const size_t size = num_pages * PageSize;
|
||||
|
||||
// We're making a new group, not adding to an existing one.
|
||||
R_UNLESS(pg.empty(), ResultInvalidCurrentMemory);
|
||||
R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory);
|
||||
|
||||
// Begin traversal.
|
||||
Common::PageTable::TraversalContext context;
|
||||
@@ -640,10 +640,11 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
|
||||
R_SUCCEED();
|
||||
}
|
||||
|
||||
bool KPageTable::IsValidPageGroup(const KPageGroup& pg, VAddr addr, size_t num_pages) {
|
||||
bool KPageTable::IsValidPageGroup(const KPageGroup& pg_ll, VAddr addr, size_t num_pages) {
|
||||
ASSERT(this->IsLockedByCurrentThread());
|
||||
|
||||
const size_t size = num_pages * PageSize;
|
||||
const auto& pg = pg_ll.Nodes();
|
||||
const auto& memory_layout = m_system.Kernel().MemoryLayout();
|
||||
|
||||
// Empty groups are necessarily invalid.
|
||||
@@ -941,6 +942,9 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
|
||||
|
||||
ON_RESULT_FAILURE {
|
||||
if (cur_mapped_addr != dst_addr) {
|
||||
// HACK: Manually close the pages.
|
||||
HACK_ClosePages(dst_addr, (cur_mapped_addr - dst_addr) / PageSize);
|
||||
|
||||
ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize,
|
||||
KMemoryPermission::None, OperationType::Unmap)
|
||||
.IsSuccess());
|
||||
@@ -1016,6 +1020,9 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
|
||||
// Map the page.
|
||||
R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page));
|
||||
|
||||
// HACK: Manually open the pages.
|
||||
HACK_OpenPages(start_partial_page, 1);
|
||||
|
||||
// Update tracking extents.
|
||||
cur_mapped_addr += PageSize;
|
||||
cur_block_addr += PageSize;
|
||||
@@ -1044,6 +1051,9 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
|
||||
R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map,
|
||||
cur_block_addr));
|
||||
|
||||
// HACK: Manually open the pages.
|
||||
HACK_OpenPages(cur_block_addr, cur_block_size / PageSize);
|
||||
|
||||
// Update tracking extents.
|
||||
cur_mapped_addr += cur_block_size;
|
||||
cur_block_addr = next_entry.phys_addr;
|
||||
@@ -1063,6 +1073,9 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
|
||||
R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map,
|
||||
cur_block_addr));
|
||||
|
||||
// HACK: Manually open the pages.
|
||||
HACK_OpenPages(cur_block_addr, last_block_size / PageSize);
|
||||
|
||||
// Update tracking extents.
|
||||
cur_mapped_addr += last_block_size;
|
||||
cur_block_addr += last_block_size;
|
||||
@@ -1094,6 +1107,9 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
|
||||
|
||||
// Map the page.
|
||||
R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page));
|
||||
|
||||
// HACK: Manually open the pages.
|
||||
HACK_OpenPages(end_partial_page, 1);
|
||||
}
|
||||
|
||||
// Update memory blocks to reflect our changes
|
||||
@@ -1195,6 +1211,9 @@ Result KPageTable::CleanupForIpcServer(VAddr address, size_t size, KMemoryState
|
||||
const size_t aligned_size = aligned_end - aligned_start;
|
||||
const size_t aligned_num_pages = aligned_size / PageSize;
|
||||
|
||||
// HACK: Manually close the pages.
|
||||
HACK_ClosePages(aligned_start, aligned_num_pages);
|
||||
|
||||
// Unmap the pages.
|
||||
R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap));
|
||||
|
||||
@@ -1482,6 +1501,17 @@ void KPageTable::CleanupForIpcClientOnServerSetupFailure([[maybe_unused]] PageLi
|
||||
}
|
||||
}
|
||||
|
||||
void KPageTable::HACK_OpenPages(PAddr phys_addr, size_t num_pages) {
|
||||
m_system.Kernel().MemoryManager().OpenFirst(phys_addr, num_pages);
|
||||
}
|
||||
|
||||
void KPageTable::HACK_ClosePages(VAddr virt_addr, size_t num_pages) {
|
||||
for (size_t index = 0; index < num_pages; ++index) {
|
||||
const auto paddr = GetPhysicalAddr(virt_addr + (index * PageSize));
|
||||
m_system.Kernel().MemoryManager().Close(paddr, 1);
|
||||
}
|
||||
}
|
||||
|
||||
Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
// Lock the physical memory lock.
|
||||
KScopedLightLock phys_lk(m_map_physical_memory_lock);
|
||||
@@ -1542,7 +1572,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
|
||||
|
||||
// Allocate pages for the new memory.
|
||||
KPageGroup pg{m_kernel, m_block_info_manager};
|
||||
KPageGroup pg;
|
||||
R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess(
|
||||
&pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0));
|
||||
|
||||
@@ -1620,7 +1650,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
KScopedPageTableUpdater updater(this);
|
||||
|
||||
// Prepare to iterate over the memory.
|
||||
auto pg_it = pg.begin();
|
||||
auto pg_it = pg.Nodes().begin();
|
||||
PAddr pg_phys_addr = pg_it->GetAddress();
|
||||
size_t pg_pages = pg_it->GetNumPages();
|
||||
|
||||
@@ -1650,6 +1680,9 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
last_unmap_address + 1 - cur_address) /
|
||||
PageSize;
|
||||
|
||||
// HACK: Manually close the pages.
|
||||
HACK_ClosePages(cur_address, cur_pages);
|
||||
|
||||
// Unmap.
|
||||
ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None,
|
||||
OperationType::Unmap)
|
||||
@@ -1670,7 +1703,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
// Release any remaining unmapped memory.
|
||||
m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages);
|
||||
m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages);
|
||||
for (++pg_it; pg_it != pg.end(); ++pg_it) {
|
||||
for (++pg_it; pg_it != pg.Nodes().end(); ++pg_it) {
|
||||
m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(),
|
||||
pg_it->GetNumPages());
|
||||
m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(),
|
||||
@@ -1698,7 +1731,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
// Check if we're at the end of the physical block.
|
||||
if (pg_pages == 0) {
|
||||
// Ensure there are more pages to map.
|
||||
ASSERT(pg_it != pg.end());
|
||||
ASSERT(pg_it != pg.Nodes().end());
|
||||
|
||||
// Advance our physical block.
|
||||
++pg_it;
|
||||
@@ -1709,7 +1742,10 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
|
||||
// Map whatever we can.
|
||||
const size_t cur_pages = std::min(pg_pages, map_pages);
|
||||
R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite,
|
||||
OperationType::MapFirst, pg_phys_addr));
|
||||
OperationType::Map, pg_phys_addr));
|
||||
|
||||
// HACK: Manually open the pages.
|
||||
HACK_OpenPages(pg_phys_addr, cur_pages);
|
||||
|
||||
// Advance.
|
||||
cur_address += cur_pages * PageSize;
|
||||
@@ -1852,6 +1888,9 @@ Result KPageTable::UnmapPhysicalMemory(VAddr address, size_t size) {
|
||||
last_address + 1 - cur_address) /
|
||||
PageSize;
|
||||
|
||||
// HACK: Manually close the pages.
|
||||
HACK_ClosePages(cur_address, cur_pages);
|
||||
|
||||
// Unmap.
|
||||
ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap)
|
||||
.IsSuccess());
|
||||
@@ -1916,7 +1955,7 @@ Result KPageTable::MapMemory(VAddr dst_address, VAddr src_address, size_t size)
|
||||
R_TRY(dst_allocator_result);
|
||||
|
||||
// Map the memory.
|
||||
KPageGroup page_linked_list{m_kernel, m_block_info_manager};
|
||||
KPageGroup page_linked_list;
|
||||
const size_t num_pages{size / PageSize};
|
||||
const KMemoryPermission new_src_perm = static_cast<KMemoryPermission>(
|
||||
KMemoryPermission::KernelRead | KMemoryPermission::NotMapped);
|
||||
@@ -1983,14 +2022,14 @@ Result KPageTable::UnmapMemory(VAddr dst_address, VAddr src_address, size_t size
|
||||
num_dst_allocator_blocks);
|
||||
R_TRY(dst_allocator_result);
|
||||
|
||||
KPageGroup src_pages{m_kernel, m_block_info_manager};
|
||||
KPageGroup dst_pages{m_kernel, m_block_info_manager};
|
||||
KPageGroup src_pages;
|
||||
KPageGroup dst_pages;
|
||||
const size_t num_pages{size / PageSize};
|
||||
|
||||
AddRegionToPages(src_address, num_pages, src_pages);
|
||||
AddRegionToPages(dst_address, num_pages, dst_pages);
|
||||
|
||||
R_UNLESS(dst_pages.IsEquivalentTo(src_pages), ResultInvalidMemoryRegion);
|
||||
R_UNLESS(dst_pages.IsEqual(src_pages), ResultInvalidMemoryRegion);
|
||||
|
||||
{
|
||||
auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); });
|
||||
@@ -2021,7 +2060,7 @@ Result KPageTable::MapPages(VAddr addr, const KPageGroup& page_linked_list,
|
||||
|
||||
VAddr cur_addr{addr};
|
||||
|
||||
for (const auto& node : page_linked_list) {
|
||||
for (const auto& node : page_linked_list.Nodes()) {
|
||||
if (const auto result{
|
||||
Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())};
|
||||
result.IsError()) {
|
||||
@@ -2121,7 +2160,7 @@ Result KPageTable::UnmapPages(VAddr addr, const KPageGroup& page_linked_list) {
|
||||
|
||||
VAddr cur_addr{addr};
|
||||
|
||||
for (const auto& node : page_linked_list) {
|
||||
for (const auto& node : page_linked_list.Nodes()) {
|
||||
if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None,
|
||||
OperationType::Unmap)};
|
||||
result.IsError()) {
|
||||
@@ -2488,13 +2527,13 @@ Result KPageTable::SetHeapSize(VAddr* out, size_t size) {
|
||||
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
|
||||
|
||||
// Allocate pages for the heap extension.
|
||||
KPageGroup pg{m_kernel, m_block_info_manager};
|
||||
KPageGroup pg;
|
||||
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
|
||||
&pg, allocation_size / PageSize,
|
||||
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
|
||||
|
||||
// Clear all the newly allocated pages.
|
||||
for (const auto& it : pg) {
|
||||
for (const auto& it : pg.Nodes()) {
|
||||
std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), m_heap_fill_value,
|
||||
it.GetSize());
|
||||
}
|
||||
@@ -2571,23 +2610,11 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(size_t needed_num_pages, size_
|
||||
if (is_map_only) {
|
||||
R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
|
||||
} else {
|
||||
// Create a page group tohold the pages we allocate.
|
||||
KPageGroup pg{m_kernel, m_block_info_manager};
|
||||
|
||||
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
|
||||
&pg, needed_num_pages,
|
||||
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
|
||||
|
||||
// Ensure that the page group is closed when we're done working with it.
|
||||
SCOPE_EXIT({ pg.Close(); });
|
||||
|
||||
// Clear all pages.
|
||||
for (const auto& it : pg) {
|
||||
std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()),
|
||||
m_heap_fill_value, it.GetSize());
|
||||
}
|
||||
|
||||
R_TRY(Operate(addr, needed_num_pages, pg, OperationType::MapGroup));
|
||||
KPageGroup page_group;
|
||||
R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess(
|
||||
&page_group, needed_num_pages,
|
||||
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option), 0, 0));
|
||||
R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
|
||||
}
|
||||
|
||||
// Update the blocks.
|
||||
@@ -2768,28 +2795,19 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, const KPageGroup& page_
|
||||
ASSERT(num_pages > 0);
|
||||
ASSERT(num_pages == page_group.GetNumPages());
|
||||
|
||||
switch (operation) {
|
||||
case OperationType::MapGroup: {
|
||||
// We want to maintain a new reference to every page in the group.
|
||||
KScopedPageGroup spg(page_group);
|
||||
for (const auto& node : page_group.Nodes()) {
|
||||
const size_t size{node.GetNumPages() * PageSize};
|
||||
|
||||
for (const auto& node : page_group) {
|
||||
const size_t size{node.GetNumPages() * PageSize};
|
||||
|
||||
// Map the pages.
|
||||
switch (operation) {
|
||||
case OperationType::MapGroup:
|
||||
m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress());
|
||||
|
||||
addr += size;
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
// We succeeded! We want to persist the reference to the pages.
|
||||
spg.CancelClose();
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(false);
|
||||
break;
|
||||
addr += size;
|
||||
}
|
||||
|
||||
R_SUCCEED();
|
||||
@@ -2804,29 +2822,13 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, KMemoryPermission perm,
|
||||
ASSERT(ContainsPages(addr, num_pages));
|
||||
|
||||
switch (operation) {
|
||||
case OperationType::Unmap: {
|
||||
// Ensure that any pages we track close on exit.
|
||||
KPageGroup pages_to_close{m_kernel, this->GetBlockInfoManager()};
|
||||
SCOPE_EXIT({ pages_to_close.CloseAndReset(); });
|
||||
|
||||
this->AddRegionToPages(addr, num_pages, pages_to_close);
|
||||
case OperationType::Unmap:
|
||||
m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize);
|
||||
break;
|
||||
}
|
||||
case OperationType::MapFirst:
|
||||
case OperationType::Map: {
|
||||
ASSERT(map_addr);
|
||||
ASSERT(Common::IsAligned(map_addr, PageSize));
|
||||
m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr);
|
||||
|
||||
// Open references to pages, if we should.
|
||||
if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) {
|
||||
if (operation == OperationType::MapFirst) {
|
||||
m_kernel.MemoryManager().OpenFirst(map_addr, num_pages);
|
||||
} else {
|
||||
m_kernel.MemoryManager().Open(map_addr, num_pages);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::Separate: {
|
||||
|
||||
@@ -107,10 +107,6 @@ public:
|
||||
return *m_page_table_impl;
|
||||
}
|
||||
|
||||
KBlockInfoManager* GetBlockInfoManager() {
|
||||
return m_block_info_manager;
|
||||
}
|
||||
|
||||
bool CanContain(VAddr addr, size_t size, KMemoryState state) const;
|
||||
|
||||
protected:
|
||||
@@ -265,6 +261,10 @@ private:
|
||||
void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address,
|
||||
size_t size, KMemoryPermission prot_perm);
|
||||
|
||||
// HACK: These will be removed once we automatically manage page reference counts.
|
||||
void HACK_OpenPages(PAddr phys_addr, size_t num_pages);
|
||||
void HACK_ClosePages(VAddr virt_addr, size_t num_pages);
|
||||
|
||||
mutable KLightLock m_general_lock;
|
||||
mutable KLightLock m_map_physical_memory_lock;
|
||||
|
||||
@@ -488,7 +488,6 @@ private:
|
||||
std::unique_ptr<Common::PageTable> m_page_table_impl;
|
||||
|
||||
Core::System& m_system;
|
||||
KernelCore& m_kernel;
|
||||
};
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -13,7 +13,10 @@
|
||||
namespace Kernel {
|
||||
|
||||
KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {}
|
||||
KSharedMemory::~KSharedMemory() = default;
|
||||
|
||||
KSharedMemory::~KSharedMemory() {
|
||||
kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemoryMax, size);
|
||||
}
|
||||
|
||||
Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_,
|
||||
Svc::MemoryPermission owner_permission_,
|
||||
@@ -46,8 +49,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
|
||||
R_UNLESS(physical_address != 0, ResultOutOfMemory);
|
||||
|
||||
//! Insert the result into our page group.
|
||||
page_group.emplace(kernel, &kernel.GetSystemSystemResource().GetBlockInfoManager());
|
||||
page_group->AddBlock(physical_address, num_pages);
|
||||
page_group.emplace(physical_address, num_pages);
|
||||
|
||||
// Commit our reservation.
|
||||
memory_reservation.Commit();
|
||||
@@ -60,7 +62,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
|
||||
is_initialized = true;
|
||||
|
||||
// Clear all pages in the memory.
|
||||
for (const auto& block : *page_group) {
|
||||
for (const auto& block : page_group->Nodes()) {
|
||||
std::memset(device_memory_.GetPointer<void>(block.GetAddress()), 0, block.GetSize());
|
||||
}
|
||||
|
||||
@@ -69,8 +71,13 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
|
||||
|
||||
void KSharedMemory::Finalize() {
|
||||
// Close and finalize the page group.
|
||||
page_group->Close();
|
||||
page_group->Finalize();
|
||||
// page_group->Close();
|
||||
// page_group->Finalize();
|
||||
|
||||
//! HACK: Manually close.
|
||||
for (const auto& block : page_group->Nodes()) {
|
||||
kernel.MemoryManager().Close(block.GetAddress(), block.GetNumPages());
|
||||
}
|
||||
|
||||
// Release the memory reservation.
|
||||
resource_limit->Release(LimitableResource::PhysicalMemoryMax, size);
|
||||
|
||||
@@ -14,7 +14,4 @@ constexpr std::size_t PageSize{1 << PageBits};
|
||||
|
||||
using Page = std::array<u8, PageSize>;
|
||||
|
||||
using KPhysicalAddress = PAddr;
|
||||
using KProcessAddress = VAddr;
|
||||
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -1485,7 +1485,7 @@ static Result MapProcessMemory(Core::System& system, VAddr dst_address, Handle p
|
||||
ResultInvalidMemoryRegion);
|
||||
|
||||
// Create a new page group.
|
||||
KPageGroup pg{system.Kernel(), dst_pt.GetBlockInfoManager()};
|
||||
KPageGroup pg;
|
||||
R_TRY(src_pt.MakeAndOpenPageGroup(
|
||||
std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess,
|
||||
KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None,
|
||||
|
||||
@@ -22,15 +22,19 @@ namespace {
|
||||
|
||||
namespace Service::NIFM {
|
||||
|
||||
// This is nn::nifm::RequestState
|
||||
enum class RequestState : u32 {
|
||||
NotSubmitted = 1,
|
||||
Error = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
Pending = 2,
|
||||
Connected = 3,
|
||||
Invalid = 1, ///< The duplicate 1 is intentional; it means both not submitted and error on HW.
|
||||
OnHold = 2,
|
||||
Accepted = 3,
|
||||
Blocking = 4,
|
||||
};
|
||||
|
||||
enum class InternetConnectionType : u8 {
|
||||
WiFi = 1,
|
||||
// This is nn::nifm::NetworkInterfaceType
|
||||
enum class NetworkInterfaceType : u32 {
|
||||
Invalid = 0,
|
||||
WiFi_Ieee80211 = 1,
|
||||
Ethernet = 2,
|
||||
};
|
||||
|
||||
@@ -42,14 +46,23 @@ enum class InternetConnectionStatus : u8 {
|
||||
Connected,
|
||||
};
|
||||
|
||||
// This is nn::nifm::NetworkProfileType
|
||||
enum class NetworkProfileType : u32 {
|
||||
User,
|
||||
SsidList,
|
||||
Temporary,
|
||||
};
|
||||
|
||||
// This is nn::nifm::IpAddressSetting
|
||||
struct IpAddressSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address current_address{};
|
||||
Network::IPv4Address ip_address{};
|
||||
Network::IPv4Address subnet_mask{};
|
||||
Network::IPv4Address gateway{};
|
||||
Network::IPv4Address default_gateway{};
|
||||
};
|
||||
static_assert(sizeof(IpAddressSetting) == 0xD, "IpAddressSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::DnsSetting
|
||||
struct DnsSetting {
|
||||
bool is_automatic{};
|
||||
Network::IPv4Address primary_dns{};
|
||||
@@ -57,18 +70,26 @@ struct DnsSetting {
|
||||
};
|
||||
static_assert(sizeof(DnsSetting) == 0x9, "DnsSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::AuthenticationSetting
|
||||
struct AuthenticationSetting {
|
||||
bool is_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
};
|
||||
static_assert(sizeof(AuthenticationSetting) == 0x41, "AuthenticationSetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::ProxySetting
|
||||
struct ProxySetting {
|
||||
bool enabled{};
|
||||
bool is_enabled{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
u16 port{};
|
||||
std::array<char, 0x64> proxy_server{};
|
||||
bool automatic_auth_enabled{};
|
||||
std::array<char, 0x20> user{};
|
||||
std::array<char, 0x20> password{};
|
||||
AuthenticationSetting authentication{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
};
|
||||
static_assert(sizeof(ProxySetting) == 0xAA, "ProxySetting has incorrect size.");
|
||||
|
||||
// This is nn::nifm::IpSettingData
|
||||
struct IpSettingData {
|
||||
IpAddressSetting ip_address_setting{};
|
||||
DnsSetting dns_setting{};
|
||||
@@ -101,6 +122,7 @@ static_assert(sizeof(NifmWirelessSettingData) == 0x70,
|
||||
"NifmWirelessSettingData has incorrect size.");
|
||||
|
||||
#pragma pack(push, 1)
|
||||
// This is nn::nifm::detail::sf::NetworkProfileData
|
||||
struct SfNetworkProfileData {
|
||||
IpSettingData ip_setting_data{};
|
||||
u128 uuid{};
|
||||
@@ -114,13 +136,14 @@ struct SfNetworkProfileData {
|
||||
};
|
||||
static_assert(sizeof(SfNetworkProfileData) == 0x17C, "SfNetworkProfileData has incorrect size.");
|
||||
|
||||
// This is nn::nifm::NetworkProfileData
|
||||
struct NifmNetworkProfileData {
|
||||
u128 uuid{};
|
||||
std::array<char, 0x40> network_name{};
|
||||
u32 unknown_1{};
|
||||
u32 unknown_2{};
|
||||
u8 unknown_3{};
|
||||
u8 unknown_4{};
|
||||
NetworkProfileType network_profile_type{};
|
||||
NetworkInterfaceType network_interface_type{};
|
||||
bool is_auto_connect{};
|
||||
bool is_large_capacity{};
|
||||
INSERT_PADDING_BYTES(2);
|
||||
NifmWirelessSettingData wireless_setting_data{};
|
||||
IpSettingData ip_setting_data{};
|
||||
@@ -184,6 +207,7 @@ public:
|
||||
|
||||
event1 = CreateKEvent(service_context, "IRequest:Event1");
|
||||
event2 = CreateKEvent(service_context, "IRequest:Event2");
|
||||
state = RequestState::NotSubmitted;
|
||||
}
|
||||
|
||||
~IRequest() override {
|
||||
@@ -196,7 +220,7 @@ private:
|
||||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
if (state == RequestState::NotSubmitted) {
|
||||
UpdateState(RequestState::Pending);
|
||||
UpdateState(RequestState::OnHold);
|
||||
}
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
@@ -219,14 +243,14 @@ private:
|
||||
switch (state) {
|
||||
case RequestState::NotSubmitted:
|
||||
return has_connection ? ResultSuccess : ResultNetworkCommunicationDisabled;
|
||||
case RequestState::Pending:
|
||||
case RequestState::OnHold:
|
||||
if (has_connection) {
|
||||
UpdateState(RequestState::Connected);
|
||||
UpdateState(RequestState::Accepted);
|
||||
} else {
|
||||
UpdateState(RequestState::Error);
|
||||
UpdateState(RequestState::Invalid);
|
||||
}
|
||||
return ResultPendingConnection;
|
||||
case RequestState::Connected:
|
||||
case RequestState::Accepted:
|
||||
default:
|
||||
return ResultSuccess;
|
||||
}
|
||||
@@ -338,9 +362,9 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
||||
.ip_setting_data{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
@@ -348,12 +372,14 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
||||
.secondary_dns{1, 0, 0, 1},
|
||||
},
|
||||
.proxy_setting{
|
||||
.enabled{false},
|
||||
.is_enabled{false},
|
||||
.port{},
|
||||
.proxy_server{},
|
||||
.automatic_auth_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
.authentication{
|
||||
.is_enabled{},
|
||||
.user{},
|
||||
.password{},
|
||||
},
|
||||
},
|
||||
.mtu{1500},
|
||||
},
|
||||
@@ -370,7 +396,7 @@ void IGeneralService::GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
|
||||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
network_profile_data.ip_setting_data.ip_address_setting.current_address =
|
||||
network_profile_data.ip_setting_data.ip_address_setting.ip_address =
|
||||
room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
@@ -444,9 +470,9 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
||||
return IpConfigInfo{
|
||||
.ip_address_setting{
|
||||
.is_automatic{true},
|
||||
.current_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.ip_address{Network::TranslateIPv4(net_iface->ip_address)},
|
||||
.subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
|
||||
.gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
.default_gateway{Network::TranslateIPv4(net_iface->gateway)},
|
||||
},
|
||||
.dns_setting{
|
||||
.is_automatic{true},
|
||||
@@ -459,7 +485,7 @@ void IGeneralService::GetCurrentIpConfigInfo(Kernel::HLERequestContext& ctx) {
|
||||
// When we're connected to a room, spoof the hosts IP address
|
||||
if (auto room_member = network.GetRoomMember().lock()) {
|
||||
if (room_member->IsConnected()) {
|
||||
ip_config_info.ip_address_setting.current_address = room_member->GetFakeIpAddress();
|
||||
ip_config_info.ip_address_setting.ip_address = room_member->GetFakeIpAddress();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -480,7 +506,7 @@ void IGeneralService::GetInternetConnectionStatus(Kernel::HLERequestContext& ctx
|
||||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
struct Output {
|
||||
InternetConnectionType type{InternetConnectionType::WiFi};
|
||||
u8 type{static_cast<u8>(NetworkInterfaceType::WiFi_Ieee80211)};
|
||||
u8 wifi_strength{3};
|
||||
InternetConnectionStatus state{InternetConnectionStatus::Connected};
|
||||
};
|
||||
|
||||
@@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
|
||||
return Errno::NETUNREACH;
|
||||
case WSAEMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case WSAETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
@@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
|
||||
return Errno::NETUNREACH;
|
||||
case EMSGSIZE:
|
||||
return Errno::MSGSIZE;
|
||||
case ETIMEDOUT:
|
||||
return Errno::TIMEDOUT;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
|
||||
return Errno::OTHER;
|
||||
@@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
|
||||
int e = errno;
|
||||
#endif
|
||||
const Errno err = TranslateNativeError(e);
|
||||
if (err == Errno::AGAIN) {
|
||||
if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
|
||||
return err;
|
||||
}
|
||||
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
|
||||
|
||||
@@ -436,7 +436,7 @@ struct Memory::Impl {
|
||||
}
|
||||
|
||||
if (Settings::IsFastmemEnabled()) {
|
||||
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
|
||||
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
|
||||
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2017 Citra Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
|
||||
|
||||
add_executable(yuzu-room
|
||||
precompiled_headers.h
|
||||
yuzu_room.cpp
|
||||
@@ -17,7 +15,7 @@ if (ENABLE_WEB_SERVICE)
|
||||
target_link_libraries(yuzu-room PRIVATE web_service)
|
||||
endif()
|
||||
|
||||
target_link_libraries(yuzu-room PRIVATE mbedtls mbedcrypto)
|
||||
target_link_libraries(yuzu-room PRIVATE MbedTLS::mbedcrypto)
|
||||
if (MSVC)
|
||||
target_link_libraries(yuzu-room PRIVATE getopt)
|
||||
endif()
|
||||
|
||||
@@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::FrontFace:
|
||||
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
|
||||
break;
|
||||
@@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
|
||||
EmitContext ctx{program, bindings, profile, runtime_info};
|
||||
Precolor(program);
|
||||
EmitCode(ctx, program);
|
||||
const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
|
||||
const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
|
||||
ctx.header.insert(0, version);
|
||||
if (program.shared_memory_size > 0) {
|
||||
const auto requested_size{program.shared_memory_size};
|
||||
|
||||
@@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
|
||||
case IR::Attribute::FrontFace:
|
||||
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddF32("{}=itof(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get attribute {}", attr);
|
||||
}
|
||||
@@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
|
||||
case IR::Attribute::VertexId:
|
||||
ctx.AddU32("{}=uint(gl_VertexID);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseInstance:
|
||||
ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
|
||||
break;
|
||||
case IR::Attribute::BaseVertex:
|
||||
ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
|
||||
break;
|
||||
case IR::Attribute::DrawID:
|
||||
ctx.AddU32("{}=uint(gl_DrawID);", inst);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get U32 attribute {}", attr);
|
||||
}
|
||||
|
||||
@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
|
||||
ctx.Const(element)));
|
||||
return ctx.OpLoad(
|
||||
ctx.F32[1],
|
||||
ctx.need_input_position_indirect
|
||||
? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
|
||||
ctx.Const(element))
|
||||
: AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
|
||||
case IR::Attribute::InstanceId:
|
||||
if (ctx.profile.support_vertex_instance_id) {
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
|
||||
@@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
|
||||
case IR::Attribute::FrontFace:
|
||||
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
|
||||
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
|
||||
@@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||
return ctx.OpISub(ctx.U32[1], index, base);
|
||||
}
|
||||
case IR::Attribute::BaseInstance:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
|
||||
case IR::Attribute::BaseVertex:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
|
||||
case IR::Attribute::DrawID:
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
||||
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
|
||||
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
|
||||
}
|
||||
|
||||
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
||||
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
|
||||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
|
||||
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
const Id thirty_two{ctx.Const(32u)};
|
||||
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
|
||||
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
|
||||
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
|
||||
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
|
||||
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
|
||||
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
|
||||
|
||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
const Id thread_id{GetThreadId(ctx)};
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
clamp = GetUpperClamp(ctx, thread_id, clamp);
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
|
||||
|
||||
if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
src_thread_id = AddPartitionBase(ctx, src_thread_id);
|
||||
}
|
||||
|
||||
SetInBoundsFlag(inst, in_range);
|
||||
return SelectValue(ctx, in_range, value, src_thread_id);
|
||||
}
|
||||
|
||||
@@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
||||
U16 = Name(TypeInt(16, false), "u16");
|
||||
S16 = Name(TypeInt(16, true), "s16");
|
||||
}
|
||||
if (info.uses_int64) {
|
||||
if (info.uses_int64 && profile.support_int64) {
|
||||
AddCapability(spv::Capability::Int64);
|
||||
U64 = Name(TypeInt(64, false), "u64");
|
||||
}
|
||||
@@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
|
||||
size_t label_index{0};
|
||||
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
AddLabel(labels[label_index]);
|
||||
const Id pointer{is_array
|
||||
? OpAccessChain(input_f32, input_position, vertex, masked_index)
|
||||
: OpAccessChain(input_f32, input_position, masked_index)};
|
||||
const Id pointer{[&]() {
|
||||
if (need_input_position_indirect) {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
|
||||
masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, u32_zero_value,
|
||||
masked_index);
|
||||
} else {
|
||||
if (is_array)
|
||||
return OpAccessChain(input_f32, input_position, vertex, masked_index);
|
||||
else
|
||||
return OpAccessChain(input_f32, input_position, masked_index);
|
||||
}
|
||||
}()};
|
||||
const Id result{OpLoad(F32[1], pointer)};
|
||||
OpReturnValue(result);
|
||||
++label_index;
|
||||
@@ -1367,30 +1379,56 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
Decorate(layer, spv::Decoration::Flat);
|
||||
}
|
||||
if (loads.AnyComponent(IR::Attribute::PositionX)) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
const bool is_fragment{stage == Stage::Fragment};
|
||||
if (!is_fragment && profile.has_broken_spirv_position_input) {
|
||||
need_input_position_indirect = true;
|
||||
|
||||
const Id input_position_struct = TypeStruct(F32[4]);
|
||||
input_position = DefineInput(*this, input_position_struct, true);
|
||||
|
||||
MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
|
||||
static_cast<unsigned>(spv::BuiltIn::Position));
|
||||
Decorate(input_position_struct, spv::Decoration::Block);
|
||||
} else {
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
|
||||
: spv::BuiltIn::Position};
|
||||
input_position = DefineInput(*this, F32[4], true, built_in);
|
||||
|
||||
if (profile.support_geometry_shader_passthrough) {
|
||||
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
|
||||
Decorate(input_position, spv::Decoration::PassthroughNV);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (loads[IR::Attribute::InstanceId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
|
||||
if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseInstance]) {
|
||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||
}
|
||||
if (loads[IR::Attribute::VertexId]) {
|
||||
if (profile.support_vertex_instance_id) {
|
||||
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
|
||||
if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else {
|
||||
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
} else if (loads[IR::Attribute::BaseVertex]) {
|
||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||
}
|
||||
if (loads[IR::Attribute::DrawID]) {
|
||||
draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
|
||||
}
|
||||
if (loads[IR::Attribute::FrontFace]) {
|
||||
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
|
||||
|
||||
@@ -218,6 +218,7 @@ public:
|
||||
Id base_instance{};
|
||||
Id vertex_id{};
|
||||
Id vertex_index{};
|
||||
Id draw_index{};
|
||||
Id base_vertex{};
|
||||
Id front_face{};
|
||||
Id point_coord{};
|
||||
@@ -279,6 +280,7 @@ public:
|
||||
Id write_global_func_u32x2{};
|
||||
Id write_global_func_u32x4{};
|
||||
|
||||
bool need_input_position_indirect{};
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
||||
|
||||
@@ -34,6 +34,11 @@ public:
|
||||
|
||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
|
||||
|
||||
[[nodiscard]] virtual bool HasHLEMacroState() const = 0;
|
||||
|
||||
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
||||
u32 offset) = 0;
|
||||
|
||||
virtual void Dump(u64 hash) = 0;
|
||||
|
||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||
@@ -52,11 +57,16 @@ public:
|
||||
return start_address;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
|
||||
return is_propietary_driver;
|
||||
}
|
||||
|
||||
protected:
|
||||
ProgramHeader sph{};
|
||||
std::array<u32, 8> gp_passthrough_mask{};
|
||||
Stage stage{};
|
||||
u32 start_address{};
|
||||
bool is_propietary_driver{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
@@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) {
|
||||
return "ViewportMask";
|
||||
case Attribute::FrontFace:
|
||||
return "FrontFace";
|
||||
case Attribute::BaseInstance:
|
||||
return "BaseInstance";
|
||||
case Attribute::BaseVertex:
|
||||
return "BaseVertex";
|
||||
case Attribute::DrawID:
|
||||
return "DrawID";
|
||||
}
|
||||
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
|
||||
}
|
||||
|
||||
@@ -219,6 +219,11 @@ enum class Attribute : u64 {
|
||||
FixedFncTexture9Q = 231,
|
||||
ViewportMask = 232,
|
||||
FrontFace = 255,
|
||||
|
||||
// Implementation attributes
|
||||
BaseInstance = 256,
|
||||
BaseVertex = 257,
|
||||
DrawID = 258,
|
||||
};
|
||||
|
||||
constexpr size_t NUM_GENERICS = 32;
|
||||
|
||||
@@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
|
||||
return GetAttributeU32(attribute, Imm32(0));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
|
||||
return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
|
||||
}
|
||||
|
||||
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
|
||||
Inst(Opcode::SetAttribute, attribute, value, vertex);
|
||||
}
|
||||
|
||||
@@ -74,6 +74,8 @@ public:
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
|
||||
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
|
||||
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
|
||||
|
||||
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
|
||||
|
||||
@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
|
||||
const Shader::VaryingState& passthrough_mask,
|
||||
bool passthrough_position,
|
||||
std::optional<IR::Attribute> passthrough_layer_attr) {
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!passthrough_mask.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_position) {
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
if (passthrough_layer_attr) {
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
|
||||
ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
}
|
||||
|
||||
u32 GetOutputTopologyVertices(OutputTopology output_topology) {
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
return 1;
|
||||
case OutputTopology::LineStrip:
|
||||
return 2;
|
||||
default:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
EmitGeometryPassthrough(
|
||||
ir, program, program.info.passthrough,
|
||||
program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
@@ -198,6 +262,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||
for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
|
||||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||
}
|
||||
|
||||
if (!host_info.support_geometry_shader_passthrough) {
|
||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||
LowerGeometryPassthrough(program, host_info);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
||||
Optimization::ConstantPropagationPass(program);
|
||||
Optimization::ConstantPropagationPass(env, program);
|
||||
|
||||
Optimization::PositionPass(env, program);
|
||||
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program);
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
||||
Optimization::TexturePass(env, program, host_info);
|
||||
|
||||
if (Settings::values.resolution_info.active) {
|
||||
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
||||
IR::Program program;
|
||||
program.stage = Stage::Geometry;
|
||||
program.output_topology = output_topology;
|
||||
switch (output_topology) {
|
||||
case OutputTopology::PointList:
|
||||
program.output_vertices = 1;
|
||||
break;
|
||||
case OutputTopology::LineStrip:
|
||||
program.output_vertices = 2;
|
||||
break;
|
||||
default:
|
||||
program.output_vertices = 3;
|
||||
break;
|
||||
}
|
||||
program.output_vertices = GetOutputTopologyVertices(output_topology);
|
||||
|
||||
program.is_geometry_passthrough = false;
|
||||
program.info.loads.mask = source_program.info.stores.mask;
|
||||
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
|
||||
node.data.block = current_block;
|
||||
|
||||
IR::IREmitter ir{*current_block};
|
||||
for (u32 i = 0; i < program.output_vertices; i++) {
|
||||
// Assign generics from input
|
||||
for (u32 j = 0; j < 32; j++) {
|
||||
if (!program.info.stores.Generic(j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
}
|
||||
|
||||
// Assign position from input
|
||||
const IR::Attribute attr = IR::Attribute::PositionX;
|
||||
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
|
||||
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
|
||||
|
||||
// Assign layer
|
||||
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
|
||||
ir.Imm32(0));
|
||||
|
||||
// Emit vertex
|
||||
ir.EmitVertex(ir.Imm32(0));
|
||||
}
|
||||
ir.EndPrimitive(ir.Imm32(0));
|
||||
EmitGeometryPassthrough(ir, program, program.info.stores, true,
|
||||
source_program.info.emulated_layer);
|
||||
|
||||
IR::Block* return_block{block_pool.Create(inst_pool)};
|
||||
IR::IREmitter{*return_block}.Epilogue();
|
||||
|
||||
@@ -15,6 +15,9 @@ struct HostTranslateInfo {
|
||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <type_traits>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
@@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||
case IR::Attribute::PrimitiveId:
|
||||
case IR::Attribute::InstanceId:
|
||||
case IR::Attribute::VertexId:
|
||||
case IR::Attribute::BaseVertex:
|
||||
case IR::Attribute::BaseInstance:
|
||||
case IR::Attribute::DrawID:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
@@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
const auto offset_value = offset.U32();
|
||||
auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
|
||||
if (!replacement) {
|
||||
return;
|
||||
}
|
||||
const auto new_attribute = [replacement]() {
|
||||
switch (*replacement) {
|
||||
case ReplaceConstant::BaseInstance:
|
||||
return IR::Attribute::BaseInstance;
|
||||
case ReplaceConstant::BaseVertex:
|
||||
return IR::Attribute::BaseVertex;
|
||||
case ReplaceConstant::DrawID:
|
||||
return IR::Attribute::DrawID;
|
||||
default:
|
||||
throw NotImplementedException("Not implemented replacement variable {}", *replacement);
|
||||
}
|
||||
}();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
|
||||
} else {
|
||||
inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
|
||||
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const auto bank_value = bank.U32();
|
||||
if (bank_value != which_bank) {
|
||||
return;
|
||||
}
|
||||
const auto offset_value = offset.U32();
|
||||
if (offset_value < offset_start || offset_value >= offset_end) {
|
||||
return;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(
|
||||
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetRegister:
|
||||
return FoldGetRegister(inst);
|
||||
@@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
IR::Opcode::CompositeInsertF16x4);
|
||||
case IR::Opcode::FSwizzleAdd:
|
||||
return FoldFSwizzleAdd(block, inst);
|
||||
case IR::Opcode::GetCbufF32:
|
||||
case IR::Opcode::GetCbufU32:
|
||||
if (env.HasHLEMacroState()) {
|
||||
FoldConstBuffer(env, block, inst);
|
||||
}
|
||||
if (env.IsPropietaryDriver()) {
|
||||
FoldDriverConstBuffer(env, block, inst, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagationPass(IR::Program& program) {
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program) {
|
||||
const auto end{program.post_order_blocks.rend()};
|
||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
ConstantPropagation(*block, inst);
|
||||
ConstantPropagation(env, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/host_translate_info.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
||||
}
|
||||
|
||||
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::U32 offset;
|
||||
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
||||
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
||||
}
|
||||
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
||||
// buffer offset in bytes.
|
||||
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
}
|
||||
|
||||
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
StorageInfo info;
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
||||
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
|
||||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ struct HostTranslateInfo;
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||
void ConstantPropagationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(Environment& env, IR::Program& program);
|
||||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
|
||||
@@ -55,6 +55,8 @@ struct Profile {
|
||||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
/// The Position builtin needs to be wrapped in a struct when used as an input
|
||||
bool has_broken_spirv_position_input{};
|
||||
/// Offset image operands with an unsigned type do not work
|
||||
bool has_broken_unsigned_image_offsets{};
|
||||
/// Signed instructions with unsigned data types are misinterpreted
|
||||
|
||||
@@ -16,6 +16,12 @@
|
||||
|
||||
namespace Shader {
|
||||
|
||||
enum class ReplaceConstant : u32 {
|
||||
BaseInstance,
|
||||
BaseVertex,
|
||||
DrawID,
|
||||
};
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
@@ -59,6 +65,8 @@ enum class Interpolation {
|
||||
struct ConstantBufferDescriptor {
|
||||
u32 index;
|
||||
u32 count;
|
||||
|
||||
auto operator<=>(const ConstantBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct StorageBufferDescriptor {
|
||||
@@ -66,6 +74,8 @@ struct StorageBufferDescriptor {
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
bool is_written;
|
||||
|
||||
auto operator<=>(const StorageBufferDescriptor&) const = default;
|
||||
};
|
||||
|
||||
struct TextureBufferDescriptor {
|
||||
@@ -78,6 +88,8 @@ struct TextureBufferDescriptor {
|
||||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureBufferDescriptor&) const = default;
|
||||
};
|
||||
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
|
||||
|
||||
@@ -89,6 +101,8 @@ struct ImageBufferDescriptor {
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageBufferDescriptor&) const = default;
|
||||
};
|
||||
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
|
||||
|
||||
@@ -104,6 +118,8 @@ struct TextureDescriptor {
|
||||
u32 secondary_shift_left;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const TextureDescriptor&) const = default;
|
||||
};
|
||||
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
|
||||
|
||||
@@ -116,6 +132,8 @@ struct ImageDescriptor {
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
u32 size_shift;
|
||||
|
||||
auto operator<=>(const ImageDescriptor&) const = default;
|
||||
};
|
||||
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
namespace Shader {
|
||||
|
||||
struct VaryingState {
|
||||
std::bitset<256> mask{};
|
||||
std::bitset<512> mask{};
|
||||
|
||||
void Set(IR::Attribute attribute, bool state = true) {
|
||||
mask[static_cast<size_t>(attribute)] = state;
|
||||
|
||||
@@ -7,6 +7,7 @@ add_executable(tests
|
||||
common/fibers.cpp
|
||||
common/host_memory.cpp
|
||||
common/param_package.cpp
|
||||
common/range_map.cpp
|
||||
common/ring_buffer.cpp
|
||||
common/scratch_buffer.cpp
|
||||
common/unique_function.cpp
|
||||
|
||||
70
src/tests/common/range_map.cpp
Normal file
70
src/tests/common/range_map.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
#include "common/range_map.h"
|
||||
|
||||
enum class MappedEnum : u32 {
|
||||
Invalid = 0,
|
||||
Valid_1 = 1,
|
||||
Valid_2 = 2,
|
||||
Valid_3 = 3,
|
||||
};
|
||||
|
||||
TEST_CASE("Range Map: Setup", "[video_core]") {
|
||||
Common::RangeMap<u64, MappedEnum> my_map(MappedEnum::Invalid);
|
||||
my_map.Map(3000, 3500, MappedEnum::Valid_1);
|
||||
my_map.Unmap(3200, 3600);
|
||||
my_map.Map(4000, 4500, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_2);
|
||||
my_map.Map(4200, 4400, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4200) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 200);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2900) == 0);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(2900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(3100) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(3200) == MappedEnum::Invalid);
|
||||
|
||||
REQUIRE(my_map.GetValueAt(4199) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4200) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(4400) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(4500) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(4600) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Unmap(0, 6000);
|
||||
for (u64 address = 0; address < 10000; address += 1000) {
|
||||
REQUIRE(my_map.GetContinousSizeFrom(address) == 0);
|
||||
}
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_1);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 4000);
|
||||
|
||||
my_map.Map(1000, 3000, MappedEnum::Valid_1);
|
||||
my_map.Map(4000, 5000, MappedEnum::Valid_2);
|
||||
my_map.Map(2500, 4100, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1500);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(2500) == 1600);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(4100) == 900);
|
||||
REQUIRE(my_map.GetValueAt(900) == MappedEnum::Invalid);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2500) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(4100) == MappedEnum::Valid_2);
|
||||
REQUIRE(my_map.GetValueAt(5000) == MappedEnum::Invalid);
|
||||
|
||||
my_map.Map(2000, 6000, MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(1000) == 1000);
|
||||
REQUIRE(my_map.GetContinousSizeFrom(3000) == 3000);
|
||||
REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1999) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(1500) == MappedEnum::Valid_1);
|
||||
REQUIRE(my_map.GetValueAt(2001) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(5999) == MappedEnum::Valid_3);
|
||||
REQUIRE(my_map.GetValueAt(6000) == MappedEnum::Invalid);
|
||||
}
|
||||
@@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
|
||||
int num = 0;
|
||||
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||
REQUIRE(num == 0);
|
||||
REQUIRE(num == 1);
|
||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||
buffer.FlushCachedWrites();
|
||||
|
||||
@@ -13,6 +13,7 @@ add_library(video_core STATIC
|
||||
buffer_cache/buffer_base.h
|
||||
buffer_cache/buffer_cache.cpp
|
||||
buffer_cache/buffer_cache.h
|
||||
cache_types.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
compatible_formats.cpp
|
||||
@@ -84,6 +85,7 @@ add_library(video_core STATIC
|
||||
gpu.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
invalidation_accumulator.h
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
precompiled_headers.h
|
||||
@@ -189,6 +191,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_texture_cache_base.cpp
|
||||
renderer_vulkan/vk_turbo_mode.cpp
|
||||
renderer_vulkan/vk_turbo_mode.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.cpp
|
||||
|
||||
@@ -430,7 +430,7 @@ private:
|
||||
if (query_begin >= SizeBytes() || size < 0) {
|
||||
return;
|
||||
}
|
||||
u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
|
||||
u64* const state_words = Array<type>();
|
||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||
@@ -483,7 +483,7 @@ private:
|
||||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||
}
|
||||
// Exclude CPU modified pages when visiting GPU pages
|
||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
||||
const u64 word = current_word;
|
||||
u64 page = page_begin;
|
||||
page_begin = 0;
|
||||
|
||||
@@ -531,7 +531,7 @@ private:
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
@@ -539,8 +539,7 @@ private:
|
||||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -564,7 +563,7 @@ private:
|
||||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const u64* const untracked_words = Array<Type::Untracked>();
|
||||
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||
const u64* const state_words = Array<type>();
|
||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||
@@ -574,8 +573,7 @@ private:
|
||||
u64 begin = std::numeric_limits<u64>::max();
|
||||
u64 end = 0;
|
||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
||||
const u64 word = state_words[word_index] & ~off_word;
|
||||
const u64 word = state_words[word_index];
|
||||
if (word == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -200,7 +200,16 @@ public:
|
||||
/// Return true when a CPU region is modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||
|
||||
std::mutex mutex;
|
||||
void SetDrawIndirect(
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
|
||||
current_draw_indirect = current_draw_indirect_;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
Runtime& runtime;
|
||||
|
||||
private:
|
||||
@@ -272,6 +281,8 @@ private:
|
||||
|
||||
void BindHostVertexBuffers();
|
||||
|
||||
void BindHostDrawIndirectBuffers();
|
||||
|
||||
void BindHostGraphicsUniformBuffers(size_t stage);
|
||||
|
||||
void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
|
||||
@@ -298,6 +309,8 @@ private:
|
||||
|
||||
void UpdateVertexBuffer(u32 index);
|
||||
|
||||
void UpdateDrawIndirect();
|
||||
|
||||
void UpdateUniformBuffers(size_t stage);
|
||||
|
||||
void UpdateStorageBuffers(size_t stage);
|
||||
@@ -372,6 +385,8 @@ private:
|
||||
SlotVector<Buffer> slot_buffers;
|
||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
|
||||
|
||||
u32 last_index_count = 0;
|
||||
|
||||
Binding index_buffer;
|
||||
@@ -380,6 +395,8 @@ private:
|
||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
@@ -674,6 +691,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
||||
}
|
||||
BindHostVertexBuffers();
|
||||
BindHostTransformFeedbackBuffers();
|
||||
if (current_draw_indirect) {
|
||||
BindHostDrawIndirectBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -823,6 +843,7 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
||||
template <class P>
|
||||
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
AccumulateFlushes();
|
||||
|
||||
if (committed_ranges.empty()) {
|
||||
return;
|
||||
}
|
||||
@@ -869,7 +890,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
buffer_id,
|
||||
});
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 256ULL;
|
||||
constexpr u64 align = 8ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = std::max(largest_copy, new_size);
|
||||
@@ -1041,6 +1062,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
||||
const auto bind_buffer = [this](const Binding& binding) {
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
bind_buffer(count_buffer_binding);
|
||||
}
|
||||
bind_buffer(indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
u32 dirty = ~0U;
|
||||
@@ -1272,6 +1306,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
||||
UpdateStorageBuffers(stage);
|
||||
UpdateTextureBuffers(stage);
|
||||
}
|
||||
if (current_draw_indirect) {
|
||||
UpdateDrawIndirect();
|
||||
}
|
||||
} while (has_deleted_buffers);
|
||||
}
|
||||
|
||||
@@ -1289,7 +1326,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const auto& index_array = draw_state.index_buffer;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
|
||||
if (!flags[Dirty::IndexBuffer]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::IndexBuffer] = false;
|
||||
@@ -1361,6 +1398,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
||||
};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateDrawIndirect() {
|
||||
const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
binding = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
binding = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = static_cast<u32>(size),
|
||||
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
|
||||
};
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||
}
|
||||
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||
indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
@@ -1880,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
||||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
const u32 alignment = runtime.GetStorageBufferAlignment();
|
||||
|
||||
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
|
||||
const u32 aligned_size =
|
||||
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
|
||||
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
|
||||
if (!cpu_addr || size == 0) {
|
||||
return NULL_BINDING;
|
||||
}
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
|
||||
|
||||
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
|
||||
const Binding binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
return binding;
|
||||
@@ -1941,4 +2006,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
24
src/video_core/cache_types.h
Normal file
24
src/video_core/cache_types.h
Normal file
@@ -0,0 +1,24 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
enum class CacheType : u32 {
|
||||
None = 0,
|
||||
TextureCache = 1 << 0,
|
||||
QueryCache = 1 << 1,
|
||||
BufferCache = 1 << 2,
|
||||
ShaderCache = 1 << 3,
|
||||
NoTextureCache = QueryCache | BufferCache | ShaderCache,
|
||||
NoBufferCache = TextureCache | QueryCache | ShaderCache,
|
||||
NoQueryCache = TextureCache | BufferCache | ShaderCache,
|
||||
All = TextureCache | QueryCache | BufferCache | ShaderCache,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(CacheType)
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -61,7 +61,7 @@ bool DmaPusher::Step() {
|
||||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
dma_state.dma_get = command_list_header.addr;
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
@@ -75,12 +75,22 @@ bool DmaPusher::Step() {
|
||||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize_destructive(command_list_header.size);
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
constexpr u32 MacroRegistersStart = 0xE00;
|
||||
if (dma_state.method < MacroRegistersStart) {
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
} else {
|
||||
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
const size_t copy_size = command_list_header.size * sizeof(u32);
|
||||
if (subchannels[dma_state.subchannel]) {
|
||||
subchannels[dma_state.subchannel]->current_dirty =
|
||||
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
|
||||
}
|
||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
|
||||
}
|
||||
ProcessCommands(command_headers);
|
||||
}
|
||||
@@ -94,6 +104,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||
|
||||
if (dma_state.method_count) {
|
||||
// Data word of methods command
|
||||
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||
if (dma_state.non_incrementing) {
|
||||
const u32 max_write = static_cast<u32>(
|
||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||
@@ -132,6 +143,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||
case SubmissionMode::Inline:
|
||||
dma_state.method = command_header.method;
|
||||
dma_state.subchannel = command_header.subchannel;
|
||||
dma_state.dma_word_offset = static_cast<u64>(
|
||||
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
|
||||
CallMethod(command_header.arg_count);
|
||||
dma_state.non_incrementing = true;
|
||||
dma_increment_once = false;
|
||||
@@ -164,8 +177,14 @@ void DmaPusher::CallMethod(u32 argument) const {
|
||||
dma_state.method_count,
|
||||
});
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
|
||||
dma_state.is_last_call);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
|
||||
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
||||
return;
|
||||
}
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,8 +193,11 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
||||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
|
||||
num_methods, dma_state.method_count);
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -156,6 +156,8 @@ private:
|
||||
u32 subchannel; ///< Current subchannel
|
||||
u32 method_count; ///< Current method count
|
||||
u32 length_pending; ///< Large NI command length pending
|
||||
GPUVAddr dma_get; ///< Currently read segment
|
||||
u64 dma_word_offset; ///< Current word ofset from address
|
||||
bool non_incrementing; ///< Current command's NI flag
|
||||
bool is_last_call;
|
||||
};
|
||||
|
||||
@@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
||||
ProcessDraw(true, num_instances);
|
||||
}
|
||||
|
||||
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
|
||||
draw_state.topology = topology;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
|
||||
u32 index_count) {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
draw_state.topology = topology;
|
||||
draw_state.index_buffer = regs.index_buffer;
|
||||
draw_state.index_buffer.first = index_first;
|
||||
draw_state.index_buffer.count = index_count;
|
||||
|
||||
ProcessDrawIndirect();
|
||||
}
|
||||
|
||||
void DrawManager::SetInlineIndexBuffer(u32 index) {
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
|
||||
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
|
||||
@@ -198,4 +215,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
|
||||
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
|
||||
}
|
||||
}
|
||||
|
||||
void DrawManager::ProcessDrawIndirect() {
|
||||
LOG_TRACE(
|
||||
HW_GPU,
|
||||
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
|
||||
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
|
||||
indirect_state.buffer_size, indirect_state.max_draw_counts);
|
||||
|
||||
UpdateTopology();
|
||||
|
||||
if (maxwell3d->ShouldExecute()) {
|
||||
maxwell3d->rasterizer->DrawIndirect();
|
||||
}
|
||||
}
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -32,6 +32,16 @@ public:
|
||||
std::vector<u8> inline_index_draw_indexes;
|
||||
};
|
||||
|
||||
struct IndirectParams {
|
||||
bool is_indexed;
|
||||
bool include_count;
|
||||
GPUVAddr count_start_address;
|
||||
GPUVAddr indirect_start_address;
|
||||
size_t buffer_size;
|
||||
size_t max_draw_counts;
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
explicit DrawManager(Maxwell3D* maxwell_3d);
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument);
|
||||
@@ -46,10 +56,22 @@ public:
|
||||
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
||||
u32 base_instance, u32 num_instances);
|
||||
|
||||
void DrawArrayIndirect(PrimitiveTopology topology);
|
||||
|
||||
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
||||
|
||||
const State& GetDrawState() const {
|
||||
return draw_state;
|
||||
}
|
||||
|
||||
IndirectParams& GetIndirectParams() {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
const IndirectParams& GetIndirectParams() const {
|
||||
return indirect_state;
|
||||
}
|
||||
|
||||
private:
|
||||
void SetInlineIndexBuffer(u32 index);
|
||||
|
||||
@@ -63,7 +85,10 @@ private:
|
||||
|
||||
void ProcessDraw(bool draw_indexed, u32 instance_count);
|
||||
|
||||
void ProcessDrawIndirect();
|
||||
|
||||
Maxwell3D* maxwell3d{};
|
||||
State draw_state{};
|
||||
IndirectParams indirect_state{};
|
||||
};
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
@@ -17,6 +21,26 @@ public:
|
||||
/// Write multiple values to the register identified by method.
|
||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) = 0;
|
||||
|
||||
void ConsumeSink() {
|
||||
if (method_sink.empty()) {
|
||||
return;
|
||||
}
|
||||
ConsumeSinkImpl();
|
||||
}
|
||||
|
||||
std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
|
||||
std::vector<std::pair<u32, u32>> method_sink{};
|
||||
bool current_dirty{};
|
||||
GPUVAddr current_dma_segment;
|
||||
|
||||
protected:
|
||||
virtual void ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
CallMethod(method, value, true);
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
|
||||
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
|
||||
x_elements, regs.line_count, regs.dest.BlockHeight(),
|
||||
regs.dest.BlockDepth(), regs.line_length_in);
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
@@ -20,11 +21,14 @@ namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
|
||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
|
||||
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
@@ -49,6 +53,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void Fermi2D::Blit() {
|
||||
MICROPROFILE_SCOPE(GPU_BlitEngine);
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
@@ -94,6 +105,7 @@ void Fermi2D::Blit() {
|
||||
config.src_x0 = 0;
|
||||
}
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||
sw_blitter->Blit(src, regs.dst, config);
|
||||
}
|
||||
|
||||
@@ -305,10 +305,13 @@ public:
|
||||
private:
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void Blit();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
||||
@@ -14,7 +14,12 @@
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
|
||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
|
||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
|
||||
}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
@@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
|
||||
upload_state.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
void KeplerCompute::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid KeplerCompute register, increase the size of the Regs structure");
|
||||
|
||||
@@ -204,6 +204,8 @@ public:
|
||||
private:
|
||||
void ProcessLaunch();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
|
||||
@@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
upload_state.BindRasterizer(rasterizer_);
|
||||
|
||||
execution_mask.reset();
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
|
||||
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
|
||||
}
|
||||
|
||||
void KeplerMemory::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
||||
@@ -73,6 +73,8 @@ public:
|
||||
} regs{};
|
||||
|
||||
private:
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
@@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||
regs.upload} {
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
execution_mask.reset();
|
||||
for (size_t i = 0; i < execution_mask.size(); i++) {
|
||||
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
|
||||
}
|
||||
}
|
||||
|
||||
Maxwell3D::~Maxwell3D() = default;
|
||||
@@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
shadow_state = regs;
|
||||
}
|
||||
|
||||
bool Maxwell3D::IsMethodExecutable(u32 method) {
|
||||
if (method >= MacroRegistersStart) {
|
||||
return true;
|
||||
}
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(draw.end):
|
||||
case MAXWELL3D_REG_INDEX(draw.begin):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer.count):
|
||||
case MAXWELL3D_REG_INDEX(draw_inline_index):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer32_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer16_first):
|
||||
case MAXWELL3D_REG_INDEX(index_buffer8_first):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
|
||||
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
|
||||
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction):
|
||||
case MAXWELL3D_REG_INDEX(load_mme.start_address):
|
||||
case MAXWELL3D_REG_INDEX(falcon[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
|
||||
case MAXWELL3D_REG_INDEX(topology_override):
|
||||
case MAXWELL3D_REG_INDEX(clear_surface):
|
||||
case MAXWELL3D_REG_INDEX(report_semaphore.query):
|
||||
case MAXWELL3D_REG_INDEX(render_enable.mode):
|
||||
case MAXWELL3D_REG_INDEX(clear_report_value):
|
||||
case MAXWELL3D_REG_INDEX(sync_info):
|
||||
case MAXWELL3D_REG_INDEX(launch_dma):
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
@@ -130,14 +201,72 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||
}
|
||||
|
||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||
for (size_t i = 0; i < amount; i++) {
|
||||
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||
}
|
||||
macro_segments.emplace_back(current_dma_segment, amount);
|
||||
current_macro_dirty |= current_dirty;
|
||||
current_dirty = false;
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
ConsumeSink();
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
macro_addresses.clear();
|
||||
macro_segments.clear();
|
||||
current_macro_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::RefreshParametersImpl() {
|
||||
size_t current_index = 0;
|
||||
for (auto& segment : macro_segments) {
|
||||
if (segment.first == 0) {
|
||||
current_index += segment.second;
|
||||
continue;
|
||||
}
|
||||
memory_manager.ReadBlock(segment.first, ¯o_params[current_index],
|
||||
sizeof(u32) * segment.second);
|
||||
current_index += segment.second;
|
||||
}
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||
u32 num_vertices = 0;
|
||||
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||
const auto& array = regs.vertex_streams[index];
|
||||
if (array.enable == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto& attribute = regs.vertex_attrib_format[index];
|
||||
if (attribute.constant) {
|
||||
num_vertices = std::max(num_vertices, 1U);
|
||||
continue;
|
||||
}
|
||||
const auto& limit = regs.vertex_stream_limits[index];
|
||||
const GPUVAddr gpu_addr_begin = array.Address();
|
||||
const GPUVAddr gpu_addr_end = limit.Address() + 1;
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
num_vertices = std::max(
|
||||
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
|
||||
}
|
||||
return num_vertices;
|
||||
}
|
||||
|
||||
size_t Maxwell3D::EstimateIndexBufferSize() {
|
||||
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
||||
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
||||
constexpr std::array<size_t, 4> max_sizes = {
|
||||
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
|
||||
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
|
||||
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||
return std::min<size_t>(
|
||||
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
|
||||
byte_size,
|
||||
static_cast<size_t>(end_address - start_address));
|
||||
}
|
||||
|
||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
@@ -152,6 +281,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
return argument;
|
||||
}
|
||||
|
||||
void Maxwell3D::ConsumeSinkImpl() {
|
||||
SCOPE_EXIT({ method_sink.clear(); });
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
if (control == Regs::ShadowRamControl::Track ||
|
||||
control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
|
||||
for (auto [method, value] : method_sink) {
|
||||
shadow_state.reg_array[method] = value;
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (control == Regs::ShadowRamControl::Replay) {
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (auto [method, value] : method_sink) {
|
||||
ProcessDirtyRegisters(method, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
||||
if (regs.reg_array[method] == argument) {
|
||||
return;
|
||||
@@ -263,7 +415,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
}
|
||||
|
||||
@@ -294,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
|
||||
ProcessCBMultiData(base_start, amount);
|
||||
break;
|
||||
case MAXWELL3D_REG_INDEX(inline_data):
|
||||
case MAXWELL3D_REG_INDEX(inline_data): {
|
||||
ASSERT(methods_pending == amount);
|
||||
upload_state.ProcessData(base_start, amount);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
for (u32 i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
@@ -332,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
|
||||
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.report_semaphore.query.operation) {
|
||||
case Regs::ReportSemaphore::Operation::Release:
|
||||
if (regs.report_semaphore.query.short_query != 0) {
|
||||
@@ -389,7 +537,11 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||
case Regs::RenderEnable::Override::NeverRender:
|
||||
execute_on = false;
|
||||
break;
|
||||
case Regs::RenderEnable::Override::UseRenderEnable:
|
||||
case Regs::RenderEnable::Override::UseRenderEnable: {
|
||||
if (rasterizer->AccelerateConditionalRendering()) {
|
||||
execute_on = true;
|
||||
return;
|
||||
}
|
||||
switch (regs.render_enable.mode) {
|
||||
case Regs::RenderEnable::Mode::True: {
|
||||
execute_on = true;
|
||||
@@ -427,6 +579,7 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCounterReset() {
|
||||
@@ -463,7 +616,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessCBBind(size_t stage_index) {
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
|
||||
// stage.
|
||||
const auto& bind_data = regs.bind_groups[stage_index];
|
||||
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
|
||||
buffer.enabled = bind_data.valid.Value() != 0;
|
||||
@@ -490,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
|
||||
|
||||
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
|
||||
const size_t copy_size = amount * sizeof(u32);
|
||||
memory_manager.WriteBlock(address, start_base, copy_size);
|
||||
memory_manager.WriteBlockCached(address, start_base, copy_size);
|
||||
|
||||
// Increment the current buffer position.
|
||||
regs.const_buffer.offset += static_cast<u32>(copy_size);
|
||||
@@ -524,4 +678,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
|
||||
HLEReplacementAttributeType name) {
|
||||
const u64 key = (static_cast<u64>(bank) << 32) | offset;
|
||||
replace_table.emplace(key, name);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -272,6 +272,7 @@ public:
|
||||
};
|
||||
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, Mode> mode;
|
||||
BitField<4, 8, u32> pad;
|
||||
};
|
||||
@@ -1217,10 +1218,12 @@ public:
|
||||
|
||||
struct Window {
|
||||
union {
|
||||
u32 raw_x;
|
||||
BitField<0, 16, u32> x_min;
|
||||
BitField<16, 16, u32> x_max;
|
||||
};
|
||||
union {
|
||||
u32 raw_y;
|
||||
BitField<0, 16, u32> y_min;
|
||||
BitField<16, 16, u32> y_max;
|
||||
};
|
||||
@@ -2708,7 +2711,7 @@ public:
|
||||
u32 post_z_pixel_imask; ///< 0x0F1C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x20);
|
||||
ConstantColorRendering const_color_rendering; ///< 0x0F40
|
||||
s32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_ref; ///< 0x0F54
|
||||
u32 stencil_back_mask; ///< 0x0F58
|
||||
u32 stencil_back_func_mask; ///< 0x0F5C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x14);
|
||||
@@ -2832,9 +2835,9 @@ public:
|
||||
Blend blend; ///< 0x133C
|
||||
u32 stencil_enable; ///< 0x1380
|
||||
StencilOp stencil_front_op; ///< 0x1384
|
||||
s32 stencil_front_ref; ///< 0x1394
|
||||
s32 stencil_front_func_mask; ///< 0x1398
|
||||
s32 stencil_front_mask; ///< 0x139C
|
||||
u32 stencil_front_ref; ///< 0x1394
|
||||
u32 stencil_front_func_mask; ///< 0x1398
|
||||
u32 stencil_front_mask; ///< 0x139C
|
||||
INSERT_PADDING_BYTES_NOINIT(0x4);
|
||||
u32 draw_auto_start_byte_count; ///< 0x13A4
|
||||
PsSaturate frag_color_clamp; ///< 0x13A8
|
||||
@@ -3020,6 +3023,24 @@ public:
|
||||
/// Store temporary hw register values, used by some calls to restore state after a operation
|
||||
Regs shadow_state;
|
||||
|
||||
// None Engine
|
||||
enum class EngineHint : u32 {
|
||||
None = 0x0,
|
||||
OnHLEMacro = 0x1,
|
||||
};
|
||||
|
||||
EngineHint engine_state{EngineHint::None};
|
||||
|
||||
enum class HLEReplacementAttributeType : u32 {
|
||||
BaseVertex = 0x0,
|
||||
BaseInstance = 0x1,
|
||||
DrawID = 0x2,
|
||||
};
|
||||
|
||||
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
|
||||
|
||||
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||
|
||||
@@ -3067,6 +3088,35 @@ public:
|
||||
std::unique_ptr<DrawManager> draw_manager;
|
||||
friend class DrawManager;
|
||||
|
||||
GPUVAddr GetMacroAddress(size_t index) const {
|
||||
return macro_addresses[index];
|
||||
}
|
||||
|
||||
void RefreshParameters() {
|
||||
if (!current_macro_dirty) {
|
||||
return;
|
||||
}
|
||||
RefreshParametersImpl();
|
||||
}
|
||||
|
||||
bool AnyParametersDirty() const {
|
||||
return current_macro_dirty;
|
||||
}
|
||||
|
||||
u32 GetMaxCurrentVertices();
|
||||
|
||||
size_t EstimateIndexBufferSize();
|
||||
|
||||
/// Handles a write to the CLEAR_BUFFERS register.
|
||||
void ProcessClearBuffers(u32 layer_count);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
@@ -3076,6 +3126,8 @@ private:
|
||||
|
||||
void ProcessDirtyRegisters(u32 method, u32 argument);
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
@@ -3116,16 +3168,13 @@ private:
|
||||
/// Handles writes to syncing register.
|
||||
void ProcessSyncPoint();
|
||||
|
||||
/// Handles a write to the CB_DATA[i] register.
|
||||
void ProcessCBData(u32 value);
|
||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||
|
||||
/// Handles a write to the CB_BIND register.
|
||||
void ProcessCBBind(size_t stage_index);
|
||||
|
||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||
std::optional<u64> GetQueryResult();
|
||||
|
||||
void RefreshParametersImpl();
|
||||
|
||||
bool IsMethodExecutable(u32 method);
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
@@ -3145,6 +3194,10 @@ private:
|
||||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||
std::vector<GPUVAddr> macro_addresses;
|
||||
bool current_macro_dirty{};
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
||||
@@ -21,7 +21,10 @@ namespace Tegra::Engines {
|
||||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
: system{system_}, memory_manager{memory_manager_} {
|
||||
execution_mask.reset();
|
||||
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
|
||||
}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
@@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
void MaxwellDMA::ConsumeSinkImpl() {
|
||||
for (auto [method, value] : method_sink) {
|
||||
regs.reg_array[method] = value;
|
||||
}
|
||||
method_sink.clear();
|
||||
}
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
|
||||
@@ -59,7 +69,7 @@ void MaxwellDMA::Launch() {
|
||||
if (launch.multi_line_enable) {
|
||||
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
|
||||
|
||||
memory_manager.FlushCaching();
|
||||
if (!is_src_pitch && !is_dst_pitch) {
|
||||
// If both the source and the destination are in block layout, assert.
|
||||
CopyBlockLinearToBlockLinear();
|
||||
@@ -94,6 +104,7 @@ void MaxwellDMA::Launch() {
|
||||
reinterpret_cast<u8*>(tmp_buffer.data()),
|
||||
regs.line_length_in * sizeof(u32));
|
||||
} else {
|
||||
memory_manager.FlushCaching();
|
||||
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
|
||||
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
|
||||
((address & 0x180) >> 1) | ((address & 0x20) << 3);
|
||||
@@ -111,8 +122,8 @@ void MaxwellDMA::Launch() {
|
||||
memory_manager.ReadBlockUnsafe(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
}
|
||||
} else if (is_src_pitch && !is_dst_pitch) {
|
||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||
@@ -122,7 +133,7 @@ void MaxwellDMA::Launch() {
|
||||
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
|
||||
tmp_buffer.size());
|
||||
memory_manager.WriteBlock(
|
||||
memory_manager.WriteBlockCached(
|
||||
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
|
||||
tmp_buffer.data(), tmp_buffer.size());
|
||||
}
|
||||
@@ -131,8 +142,8 @@ void MaxwellDMA::Launch() {
|
||||
std::vector<u8> tmp_buffer(regs.line_length_in);
|
||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||
regs.line_length_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -194,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
@@ -246,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
|
||||
regs.pitch_in);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
@@ -277,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
|
||||
regs.src_params.block_size.height, regs.src_params.block_size.depth,
|
||||
regs.pitch_out);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
@@ -337,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
|
||||
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
|
||||
dst.block_size.height, dst.block_size.depth, pitch);
|
||||
|
||||
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
|
||||
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
|
||||
}
|
||||
|
||||
void MaxwellDMA::ReleaseSemaphore() {
|
||||
|
||||
@@ -231,6 +231,8 @@ private:
|
||||
|
||||
void ReleaseSemaphore();
|
||||
|
||||
void ConsumeSinkImpl() override;
|
||||
|
||||
Core::System& system;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
@@ -47,6 +47,7 @@ set(SHADER_FILES
|
||||
vulkan_present_scaleforce_fp16.frag
|
||||
vulkan_present_scaleforce_fp32.frag
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_turbo_mode.comp
|
||||
vulkan_uint8.comp
|
||||
)
|
||||
|
||||
|
||||
29
src/video_core/host_shaders/vulkan_turbo_mode.comp
Normal file
29
src/video_core/host_shaders/vulkan_turbo_mode.comp
Normal file
@@ -0,0 +1,29 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
|
||||
|
||||
layout (binding = 0) buffer ThreadData {
|
||||
uint data[];
|
||||
};
|
||||
|
||||
uint xorshift32(uint x) {
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 5;
|
||||
return x;
|
||||
}
|
||||
|
||||
uint getGlobalIndex() {
|
||||
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint myIndex = xorshift32(getGlobalIndex());
|
||||
uint otherIndex = xorshift32(myIndex);
|
||||
|
||||
uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
|
||||
atomicAdd(data[myIndex % data.length()], otherValue);
|
||||
}
|
||||
79
src/video_core/invalidation_accumulator.h
Normal file
79
src/video_core/invalidation_accumulator.h
Normal file
@@ -0,0 +1,79 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class InvalidationAccumulator {
|
||||
public:
|
||||
InvalidationAccumulator() = default;
|
||||
~InvalidationAccumulator() = default;
|
||||
|
||||
void Add(GPUVAddr address, size_t size) {
|
||||
const auto reset_values = [&]() {
|
||||
if (has_collected) {
|
||||
buffer.emplace_back(start_address, accumulated_size);
|
||||
}
|
||||
start_address = address;
|
||||
accumulated_size = size;
|
||||
last_collection = start_address + size;
|
||||
};
|
||||
if (address >= start_address && address + size <= last_collection) [[likely]] {
|
||||
return;
|
||||
}
|
||||
size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
|
||||
address = address & atomicity_mask;
|
||||
if (!has_collected) [[unlikely]] {
|
||||
reset_values();
|
||||
has_collected = true;
|
||||
return;
|
||||
}
|
||||
if (address != last_collection) [[unlikely]] {
|
||||
reset_values();
|
||||
return;
|
||||
}
|
||||
accumulated_size += size;
|
||||
last_collection += size;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
buffer.clear();
|
||||
start_address = 0;
|
||||
last_collection = 0;
|
||||
has_collected = false;
|
||||
}
|
||||
|
||||
bool AnyAccumulated() const {
|
||||
return has_collected;
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void Callback(Func&& func) {
|
||||
if (!has_collected) {
|
||||
return;
|
||||
}
|
||||
buffer.emplace_back(start_address, accumulated_size);
|
||||
for (auto& [address, size] : buffer) {
|
||||
func(address, size);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t atomicity_bits = 5;
|
||||
static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
|
||||
static constexpr size_t atomicity_size_mask = atomicity_size - 1;
|
||||
static constexpr size_t atomicity_mask = ~atomicity_size_mask;
|
||||
GPUVAddr start_address{};
|
||||
GPUVAddr last_collection{};
|
||||
size_t accumulated_size{};
|
||||
bool has_collected{};
|
||||
std::vector<std::pair<VAddr, size_t>> buffer;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -12,7 +12,9 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/fs/path_util.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
@@ -21,6 +23,8 @@
|
||||
#include "video_core/macro/macro_jit_x64.h"
|
||||
#endif
|
||||
|
||||
MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro HLE", MP_RGB(128, 192, 192));
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
static void Dump(u64 hash, std::span<const u32> code) {
|
||||
@@ -40,8 +44,8 @@ static void Dump(u64 hash, std::span<const u32> code) {
|
||||
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
|
||||
}
|
||||
|
||||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
|
||||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
|
||||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_)
|
||||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
MacroEngine::~MacroEngine() = default;
|
||||
|
||||
@@ -59,8 +63,10 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||
if (compiled_macro != macro_cache.end()) {
|
||||
const auto& cache_info = compiled_macro->second;
|
||||
if (cache_info.has_hle_program) {
|
||||
MICROPROFILE_SCOPE(MacroHLE);
|
||||
cache_info.hle_program->Execute(parameters, method);
|
||||
} else {
|
||||
maxwell3d.RefreshParameters();
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
}
|
||||
} else {
|
||||
@@ -101,12 +107,15 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||
}
|
||||
}
|
||||
|
||||
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
|
||||
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
|
||||
if (!hle_program || Settings::values.disable_macro_hle) {
|
||||
maxwell3d.RefreshParameters();
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
} else {
|
||||
cache_info.has_hle_program = true;
|
||||
cache_info.hle_program = std::move(hle_program);
|
||||
MICROPROFILE_SCOPE(MacroHLE);
|
||||
cache_info.hle_program->Execute(parameters, method);
|
||||
} else {
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -137,6 +137,7 @@ private:
|
||||
std::unordered_map<u32, CacheInfo> macro_cache;
|
||||
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
||||
std::unique_ptr<HLEMacro> hle_macros;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
@@ -1,143 +1,551 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/draw_manager.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
using Maxwell3D = Engines::Maxwell3D;
|
||||
|
||||
namespace {
|
||||
|
||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
|
||||
|
||||
// HLE'd functions
|
||||
void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff),
|
||||
parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
|
||||
}
|
||||
|
||||
void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
maxwell3d.draw_manager->DrawArray(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||
parameters[3], parameters[1], parameters[4], instance_count);
|
||||
}
|
||||
|
||||
void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
const u32 element_base = parameters[4];
|
||||
const u32 base_instance = parameters[5];
|
||||
maxwell3d.regs.vertex_id_base = element_base;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, element_base, true);
|
||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
||||
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||
}
|
||||
|
||||
// Multidraw Indirect
|
||||
void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT({
|
||||
// Clean everything.
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
});
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
const u32 padding = parameters[3];
|
||||
const std::size_t max_draws = parameters[4];
|
||||
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const std::size_t first_draw = start_indirect;
|
||||
const std::size_t effective_draws = end_indirect - start_indirect;
|
||||
const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
|
||||
|
||||
for (std::size_t index = first_draw; index < last_draw; index++) {
|
||||
const std::size_t base = index * indirect_words + 5;
|
||||
const u32 base_vertex = parameters[base + 3];
|
||||
const u32 base_instance = parameters[base + 4];
|
||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]),
|
||||
parameters[base + 2], parameters[base], base_vertex, base_instance,
|
||||
parameters[base + 1]);
|
||||
bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||
return true;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-layer Clear
|
||||
void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
||||
ASSERT(parameters.size() == 1);
|
||||
|
||||
const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
||||
const u32 rt_index = clear_params.RT;
|
||||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
|
||||
ASSERT(clear_params.layer == 0);
|
||||
|
||||
maxwell3d.regs.clear_surface.raw = clear_params.raw;
|
||||
maxwell3d.draw_manager->Clear(num_layers);
|
||||
}
|
||||
|
||||
constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
|
||||
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
|
||||
{0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
|
||||
{0x0217920100488FF7, &HLE_0217920100488FF7},
|
||||
{0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164},
|
||||
{0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B},
|
||||
}};
|
||||
|
||||
class HLEMacroImpl final : public CachedMacro {
|
||||
class HLEMacroImpl : public CachedMacro {
|
||||
public:
|
||||
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
|
||||
: maxwell3d{maxwell3d_}, func{func_} {}
|
||||
explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override {
|
||||
func(maxwell3d, parameters);
|
||||
protected:
|
||||
Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
/*
|
||||
* @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
* also assigning the base vertex/instance.
|
||||
*/
|
||||
template <bool extended>
|
||||
class HLE_DrawArraysIndirect final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_indexed = false;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 4 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
HLEFunction func;
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT({
|
||||
if (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
});
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
const u32 vertex_first = parameters[3];
|
||||
const u32 vertex_count = parameters[1];
|
||||
|
||||
if (!IsTopologySafe(topology) &&
|
||||
static_cast<size_t>(maxwell3d.GetMaxCurrentVertices()) <
|
||||
static_cast<size_t>(vertex_first) + static_cast<size_t>(vertex_count)) {
|
||||
ASSERT_MSG(false, "Faulty draw!");
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 base_instance = parameters[4];
|
||||
if constexpr (extended) {
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
|
||||
instance_count);
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.regs.global_base_instance_index = 0;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
* also assigning the base vertex/instance.
|
||||
*/
|
||||
template <bool extended>
|
||||
class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||
const u32 element_base = parameters[4];
|
||||
const u32 base_instance = parameters[5];
|
||||
maxwell3d.regs.vertex_id_base = element_base;
|
||||
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_indexed = true;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 5 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
const u32 element_base = parameters[4];
|
||||
const u32 base_instance = parameters[5];
|
||||
maxwell3d.regs.vertex_id_base = element_base;
|
||||
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3],
|
||||
parameters[1], element_base, base_instance, instance_count);
|
||||
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_MultiLayerClear final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
ASSERT(parameters.size() == 1);
|
||||
|
||||
const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
||||
const u32 rt_index = clear_params.RT;
|
||||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
|
||||
ASSERT(clear_params.layer == 0);
|
||||
|
||||
maxwell3d.regs.clear_surface.raw = clear_params.raw;
|
||||
maxwell3d.draw_manager->Clear(num_layers);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||
if (!IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 padding = parameters[3]; // padding is in words
|
||||
|
||||
// size of each indirect segment
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const u32 stride = indirect_words * sizeof(u32);
|
||||
const std::size_t draw_count = end_indirect - start_indirect;
|
||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_indexed = true;
|
||||
params.include_count = true;
|
||||
params.count_start_address = maxwell3d.GetMacroAddress(4);
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
|
||||
params.buffer_size = stride * draw_count;
|
||||
params.max_draw_counts = draw_count;
|
||||
params.stride = stride;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
|
||||
Maxwell3D::HLEReplacementAttributeType::DrawID);
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT({
|
||||
// Clean everything.
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
});
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||
const u32 padding = parameters[3];
|
||||
const std::size_t max_draws = parameters[4];
|
||||
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const std::size_t first_draw = start_indirect;
|
||||
const std::size_t effective_draws = end_indirect - start_indirect;
|
||||
const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
|
||||
|
||||
for (std::size_t index = first_draw; index < last_draw; index++) {
|
||||
const std::size_t base = index * indirect_words + 5;
|
||||
const u32 base_vertex = parameters[base + 3];
|
||||
const u32 base_instance = parameters[base + 4];
|
||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.CallMethod(0x8e3, 0x648, true);
|
||||
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
||||
base_vertex, base_instance, parameters[base + 1]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
|
||||
const u32 address = maxwell3d.regs.shadow_scratch[24];
|
||||
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||
const_buffer.size = 0x7000;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
const_buffer.offset = offset;
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const size_t index = parameters[0];
|
||||
const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
|
||||
const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
|
||||
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||
const_buffer.size = size;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_BindShader final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
auto& regs = maxwell3d.regs;
|
||||
const u32 index = parameters[0];
|
||||
if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
regs.pipelines[index & 0xF].offset = parameters[2];
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
|
||||
regs.shadow_scratch[28 + index] = parameters[1];
|
||||
regs.shadow_scratch[34 + index] = parameters[2];
|
||||
|
||||
const u32 address = parameters[4];
|
||||
auto& const_buffer = regs.const_buffer;
|
||||
const_buffer.size = 0x10000;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
|
||||
const size_t bind_group_id = parameters[3] & 0x7F;
|
||||
auto& bind_group = regs.bind_groups[bind_group_id];
|
||||
bind_group.raw_config = 0x11;
|
||||
maxwell3d.ProcessCBBind(bind_group_id);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 raster_mode = parameters[0];
|
||||
auto& regs = maxwell3d.regs;
|
||||
const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
|
||||
const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
|
||||
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
|
||||
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t base_size>
|
||||
class HLE_ClearConstBuffer final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
static constexpr std::array<u32, base_size> zeroes{};
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.const_buffer.size = static_cast<u32>(base_size);
|
||||
regs.const_buffer.address_high = parameters[0];
|
||||
regs.const_buffer.address_low = parameters[1];
|
||||
regs.const_buffer.offset = 0;
|
||||
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_ClearMemory final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
const u32 needed_memory = parameters[2] / sizeof(u32);
|
||||
if (needed_memory > zero_memory.size()) {
|
||||
zero_memory.resize(needed_memory, 0);
|
||||
}
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.upload.line_length_in = parameters[2];
|
||||
regs.upload.line_count = 1;
|
||||
regs.upload.dest.address_high = parameters[0];
|
||||
regs.upload.dest.address_low = parameters[1];
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
|
||||
maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
|
||||
zero_memory.data(), needed_memory, needed_memory);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<u32> zero_memory;
|
||||
};
|
||||
|
||||
class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.transform_feedback_enabled = 1;
|
||||
regs.transform_feedback.buffers[0].start_offset = 0;
|
||||
regs.transform_feedback.buffers[1].start_offset = 0;
|
||||
regs.transform_feedback.buffers[2].start_offset = 0;
|
||||
regs.transform_feedback.buffers[3].start_offset = 0;
|
||||
|
||||
regs.upload.line_length_in = 4;
|
||||
regs.upload.line_count = 1;
|
||||
regs.upload.dest.address_high = parameters[0];
|
||||
regs.upload.dest.address_low = parameters[1];
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
|
||||
regs.transform_feedback.controls[0].stride, true);
|
||||
}
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||
builders.emplace(0x0D61FC9FAAC9FCADULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x8A4D173EB99A8603ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x771BB18C62444DA0ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x0217920100488FF7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x3F5E74B9C9A50164ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(
|
||||
maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEAD26C3E2109B06BULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_MultiLayerClear>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xC713C83D8F63CCF3ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xD7333D26E0A93EDEULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEB29B2A09AA06D38ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_BindShader>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xDB1341DBEB4C8AF7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x6C97861D891EDf7EULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xD246FDDF3A6173D7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEE4D0004BEC8ECF4ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearMemory>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xFC0CF27F5FFAA661ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
|
||||
}));
|
||||
}
|
||||
|
||||
HLEMacro::~HLEMacro() = default;
|
||||
|
||||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||
const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
|
||||
[hash](const auto& pair) { return pair.first == hash; });
|
||||
if (it == hle_funcs.end()) {
|
||||
const auto it = builders.find(hash);
|
||||
if (it == builders.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
|
||||
return it->second(maxwell3d);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
@@ -23,6 +26,8 @@ public:
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>>
|
||||
builders;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -6,11 +6,13 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "core/hle/kernel/k_page_table.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/invalidation_accumulator.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
@@ -25,7 +27,9 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||
address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
|
||||
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
|
||||
page_bits != big_page_bits ? page_bits : 0},
|
||||
unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} {
|
||||
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
|
||||
1, std::memory_order_acq_rel)},
|
||||
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
|
||||
address_space_size = 1ULL << address_space_bits;
|
||||
page_size = 1ULL << page_bits;
|
||||
page_mask = page_size - 1ULL;
|
||||
@@ -41,11 +45,12 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||
big_entries.resize(big_page_table_size / 32, 0);
|
||||
big_page_table_cpu.resize(big_page_table_size);
|
||||
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
||||
std::array<PTEKind, 32> kind_valus;
|
||||
kind_valus.fill(PTEKind::INVALID);
|
||||
big_kinds.resize(big_page_table_size / 32, kind_valus);
|
||||
entries.resize(page_table_size / 32, 0);
|
||||
kinds.resize(page_table_size / 32, kind_valus);
|
||||
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
|
||||
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
|
||||
} else {
|
||||
fastmem_arena = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryManager::~MemoryManager() = default;
|
||||
@@ -83,38 +88,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
|
||||
}
|
||||
|
||||
PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
|
||||
auto entry = GetEntry<true>(gpu_addr);
|
||||
if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
|
||||
return GetKind<true>(gpu_addr);
|
||||
} else {
|
||||
return GetKind<false>(gpu_addr);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_big_page>
|
||||
PTEKind MemoryManager::GetKind(size_t position) const {
|
||||
if constexpr (is_big_page) {
|
||||
position = position >> big_page_bits;
|
||||
const size_t sub_index = position % 32;
|
||||
return big_kinds[position / 32][sub_index];
|
||||
} else {
|
||||
position = position >> page_bits;
|
||||
const size_t sub_index = position % 32;
|
||||
return kinds[position / 32][sub_index];
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_big_page>
|
||||
void MemoryManager::SetKind(size_t position, PTEKind kind) {
|
||||
if constexpr (is_big_page) {
|
||||
position = position >> big_page_bits;
|
||||
const size_t sub_index = position % 32;
|
||||
big_kinds[position / 32][sub_index] = kind;
|
||||
} else {
|
||||
position = position >> page_bits;
|
||||
const size_t sub_index = position % 32;
|
||||
kinds[position / 32][sub_index] = kind;
|
||||
}
|
||||
return kind_map.GetValueAt(gpu_addr);
|
||||
}
|
||||
|
||||
inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
|
||||
@@ -141,7 +115,6 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
|
||||
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
||||
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
|
||||
SetEntry<false>(current_gpu_addr, entry_type);
|
||||
SetKind<false>(current_gpu_addr, kind);
|
||||
if (current_entry_type != entry_type) {
|
||||
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
|
||||
}
|
||||
@@ -153,6 +126,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
|
||||
}
|
||||
remaining_size -= page_size;
|
||||
}
|
||||
kind_map.Map(gpu_addr, gpu_addr + size, kind);
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -164,7 +138,6 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
|
||||
const GPUVAddr current_gpu_addr = gpu_addr + offset;
|
||||
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
|
||||
SetEntry<true>(current_gpu_addr, entry_type);
|
||||
SetKind<true>(current_gpu_addr, kind);
|
||||
if (current_entry_type != entry_type) {
|
||||
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
|
||||
}
|
||||
@@ -193,6 +166,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
|
||||
}
|
||||
remaining_size -= big_page_size;
|
||||
}
|
||||
kind_map.Map(gpu_addr, gpu_addr + size, kind);
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
@@ -219,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
||||
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
|
||||
|
||||
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
||||
for (const auto& [map_addr, map_size] : page_stash) {
|
||||
rasterizer->UnmapMemory(map_addr, map_size);
|
||||
}
|
||||
page_stash.clear();
|
||||
|
||||
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
|
||||
@@ -325,9 +296,15 @@ template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typenam
|
||||
inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size,
|
||||
FuncMapped&& func_mapped, FuncReserved&& func_reserved,
|
||||
FuncUnmapped&& func_unmapped) const {
|
||||
static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v<FuncMapped, bool>;
|
||||
static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v<FuncReserved, bool>;
|
||||
static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v<FuncUnmapped, bool>;
|
||||
using FuncMappedReturn =
|
||||
typename std::invoke_result<FuncMapped, std::size_t, std::size_t, std::size_t>::type;
|
||||
using FuncReservedReturn =
|
||||
typename std::invoke_result<FuncReserved, std::size_t, std::size_t, std::size_t>::type;
|
||||
using FuncUnmappedReturn =
|
||||
typename std::invoke_result<FuncUnmapped, std::size_t, std::size_t, std::size_t>::type;
|
||||
static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v<FuncMappedReturn, bool>;
|
||||
static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v<FuncReservedReturn, bool>;
|
||||
static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v<FuncUnmappedReturn, bool>;
|
||||
u64 used_page_size;
|
||||
u64 used_page_mask;
|
||||
u64 used_page_bits;
|
||||
@@ -383,9 +360,9 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_safe>
|
||||
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
std::size_t size) const {
|
||||
template <bool is_safe, bool use_fastmem>
|
||||
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
[[maybe_unused]] VideoCommon::CacheType which) const {
|
||||
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
|
||||
std::memset(dest_buffer, 0, copy_amount);
|
||||
@@ -395,23 +372,31 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
if constexpr (use_fastmem) {
|
||||
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
}
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
};
|
||||
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
||||
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
||||
if constexpr (use_fastmem) {
|
||||
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
||||
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
|
||||
} else {
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(dest_buffer, physical, copy_amount);
|
||||
}
|
||||
}
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
};
|
||||
@@ -423,18 +408,27 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages);
|
||||
}
|
||||
|
||||
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
|
||||
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size);
|
||||
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
if (fastmem_arena) [[likely]] {
|
||||
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
|
||||
return;
|
||||
}
|
||||
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
|
||||
}
|
||||
|
||||
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
const std::size_t size) const {
|
||||
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size);
|
||||
if (fastmem_arena) [[likely]] {
|
||||
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
||||
return;
|
||||
}
|
||||
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
|
||||
}
|
||||
|
||||
template <bool is_safe>
|
||||
void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
|
||||
[[maybe_unused]] VideoCommon::CacheType which) {
|
||||
auto just_advance = [&]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
|
||||
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
||||
@@ -443,7 +437,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
u8* physical = memory.GetPointer(cpu_addr_base);
|
||||
std::memcpy(physical, src_buffer, copy_amount);
|
||||
@@ -453,7 +447,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
|
||||
if constexpr (is_safe) {
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
|
||||
}
|
||||
if (!IsBigPageContinous(page_index)) [[unlikely]] {
|
||||
memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
|
||||
@@ -471,16 +465,24 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
|
||||
MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages);
|
||||
}
|
||||
|
||||
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
|
||||
WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size);
|
||||
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which) {
|
||||
WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size, which);
|
||||
}
|
||||
|
||||
void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size);
|
||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||
}
|
||||
|
||||
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
|
||||
accumulator->Add(gpu_dest_addr, size);
|
||||
}
|
||||
|
||||
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset,
|
||||
[[maybe_unused]] std::size_t copy_amount) {};
|
||||
@@ -488,12 +490,12 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
};
|
||||
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
|
||||
};
|
||||
auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
|
||||
std::size_t copy_amount) {
|
||||
@@ -503,7 +505,8 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
|
||||
}
|
||||
|
||||
bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const {
|
||||
bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
bool result = false;
|
||||
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset,
|
||||
@@ -512,13 +515,13 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const {
|
||||
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
|
||||
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
|
||||
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which);
|
||||
return result;
|
||||
};
|
||||
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
|
||||
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount);
|
||||
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which);
|
||||
return result;
|
||||
};
|
||||
auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
|
||||
@@ -571,7 +574,12 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
|
||||
return range_so_far;
|
||||
}
|
||||
|
||||
void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
|
||||
return kind_map.GetContinousSizeFrom(gpu_addr);
|
||||
}
|
||||
|
||||
void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which) const {
|
||||
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset,
|
||||
[[maybe_unused]] std::size_t copy_amount) {};
|
||||
@@ -579,12 +587,12 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
|
||||
};
|
||||
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||
const VAddr cpu_addr_base =
|
||||
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
|
||||
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
|
||||
};
|
||||
auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
|
||||
std::size_t copy_amount) {
|
||||
@@ -594,14 +602,15 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
|
||||
MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages);
|
||||
}
|
||||
|
||||
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
|
||||
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
|
||||
VideoCommon::CacheType which) {
|
||||
std::vector<u8> tmp_buffer(size);
|
||||
ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
|
||||
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
|
||||
|
||||
// The output block must be flushed in case it has data modified from the GPU.
|
||||
// Fixes NPC geometry in Zombie Panic in Wonderland DX
|
||||
FlushRegion(gpu_dest_addr, size);
|
||||
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
|
||||
FlushRegion(gpu_dest_addr, size, which);
|
||||
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which);
|
||||
}
|
||||
|
||||
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
|
||||
@@ -681,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
GPUVAddr gpu_addr, std::size_t size) const {
|
||||
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
||||
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
|
||||
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <bool is_gpu_address>
|
||||
void MemoryManager::GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
result) const {
|
||||
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
|
||||
last_segment{};
|
||||
std::optional<VAddr> old_page_addr{};
|
||||
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
|
||||
[[maybe_unused]] std::size_t offset,
|
||||
@@ -703,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
}
|
||||
old_page_addr = {cpu_addr_base + copy_amount};
|
||||
if (!last_segment) {
|
||||
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
if constexpr (is_gpu_address) {
|
||||
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
} else {
|
||||
last_segment = {cpu_addr_base, copy_amount};
|
||||
}
|
||||
} else {
|
||||
last_segment->second += copy_amount;
|
||||
}
|
||||
@@ -721,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
}
|
||||
old_page_addr = {cpu_addr_base + copy_amount};
|
||||
if (!last_segment) {
|
||||
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
if constexpr (is_gpu_address) {
|
||||
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
|
||||
last_segment = {new_base_addr, copy_amount};
|
||||
} else {
|
||||
last_segment = {cpu_addr_base, copy_amount};
|
||||
}
|
||||
} else {
|
||||
last_segment->second += copy_amount;
|
||||
}
|
||||
@@ -733,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||
};
|
||||
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
|
||||
split(0, 0, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
void MemoryManager::FlushCaching() {
|
||||
if (!accumulator->AnyAccumulated()) {
|
||||
return;
|
||||
}
|
||||
accumulator->Callback([this](GPUVAddr addr, size_t size) {
|
||||
GetSubmappedRangeImpl<false>(addr, size, page_stash);
|
||||
});
|
||||
rasterizer->InnerInvalidation(page_stash);
|
||||
page_stash.clear();
|
||||
accumulator->Clear();
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
@@ -10,13 +10,19 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/multi_level_page_table.h"
|
||||
#include "common/range_map.h"
|
||||
#include "common/virtual_buffer.h"
|
||||
#include "video_core/cache_types.h"
|
||||
#include "video_core/pte_kind.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
class InvalidationAccumulator;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class DeviceMemory;
|
||||
namespace Memory {
|
||||
@@ -59,9 +65,12 @@ public:
|
||||
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
|
||||
* Flushes and Invalidations, respectively to each operation.
|
||||
*/
|
||||
void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
|
||||
void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
|
||||
void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All);
|
||||
void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All);
|
||||
|
||||
/**
|
||||
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
|
||||
@@ -75,6 +84,7 @@ public:
|
||||
*/
|
||||
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Checks if a gpu region can be simply read with a pointer.
|
||||
@@ -104,11 +114,14 @@ public:
|
||||
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
|
||||
void Unmap(GPUVAddr gpu_addr, std::size_t size);
|
||||
|
||||
void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
|
||||
void FlushRegion(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
|
||||
|
||||
void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const;
|
||||
void InvalidateRegion(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
|
||||
|
||||
bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const;
|
||||
bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
|
||||
|
||||
size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
|
||||
|
||||
@@ -118,16 +131,23 @@ public:
|
||||
|
||||
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
|
||||
|
||||
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
|
||||
size_t max_size = std::numeric_limits<size_t>::max()) const;
|
||||
|
||||
void FlushCaching();
|
||||
|
||||
private:
|
||||
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
||||
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
|
||||
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
|
||||
|
||||
template <bool is_safe>
|
||||
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||
template <bool is_safe, bool use_fastmem>
|
||||
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which) const;
|
||||
|
||||
template <bool is_safe>
|
||||
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
|
||||
VideoCommon::CacheType which);
|
||||
|
||||
template <bool is_big_page>
|
||||
[[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
|
||||
@@ -141,6 +161,12 @@ private:
|
||||
inline bool IsBigPageContinous(size_t big_page_index) const;
|
||||
inline void SetBigPageContinous(size_t big_page_index, bool value);
|
||||
|
||||
template <bool is_gpu_address>
|
||||
void GetSubmappedRangeImpl(
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
|
||||
result) const;
|
||||
|
||||
Core::System& system;
|
||||
Core::Memory::Memory& memory;
|
||||
Core::DeviceMemory& device_memory;
|
||||
@@ -183,23 +209,18 @@ private:
|
||||
template <bool is_big_page>
|
||||
inline void SetEntry(size_t position, EntryType entry);
|
||||
|
||||
std::vector<std::array<PTEKind, 32>> kinds;
|
||||
std::vector<std::array<PTEKind, 32>> big_kinds;
|
||||
|
||||
template <bool is_big_page>
|
||||
inline PTEKind GetKind(size_t position) const;
|
||||
|
||||
template <bool is_big_page>
|
||||
inline void SetKind(size_t position, PTEKind kind);
|
||||
|
||||
Common::MultiLevelPageTable<u32> page_table;
|
||||
Common::RangeMap<GPUVAddr, PTEKind> kind_map;
|
||||
Common::VirtualBuffer<u32> big_page_table_cpu;
|
||||
|
||||
std::vector<u64> big_page_continous;
|
||||
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
|
||||
u8* fastmem_arena{};
|
||||
|
||||
constexpr static size_t continous_bits = 64;
|
||||
|
||||
const size_t unique_identifier;
|
||||
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
|
||||
|
||||
static std::atomic<size_t> unique_identifier_generator;
|
||||
};
|
||||
|
||||
@@ -6,8 +6,10 @@
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include "common/common_types.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/cache_types.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
@@ -42,6 +44,9 @@ public:
|
||||
/// Dispatches a draw invocation
|
||||
virtual void Draw(bool is_indexed, u32 instance_count) = 0;
|
||||
|
||||
/// Dispatches an indirect draw invocation
|
||||
virtual void DrawIndirect() {}
|
||||
|
||||
/// Clear the current framebuffer
|
||||
virtual void Clear(u32 layer_count) = 0;
|
||||
|
||||
@@ -80,13 +85,22 @@ public:
|
||||
virtual void FlushAll() = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||
|
||||
/// Check if the the specified memory area requires flushing to CPU Memory.
|
||||
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
|
||||
virtual bool MustFlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||
|
||||
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||
for (const auto& [cpu_addr, size] : sequences) {
|
||||
InvalidateRegion(cpu_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||
@@ -102,7 +116,8 @@ public:
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(
|
||||
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||
|
||||
/// Notify the host renderer to wait for previous primitive and compute operations.
|
||||
virtual void WaitForIdle() = 0;
|
||||
@@ -119,6 +134,10 @@ public:
|
||||
/// Notify rasterizer that a frame is about to finish
|
||||
virtual void TickFrame() = 0;
|
||||
|
||||
virtual bool AccelerateConditionalRendering() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
|
||||
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
|
||||
@@ -39,11 +39,11 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
|
||||
u32 size) {}
|
||||
void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {}
|
||||
void RasterizerNull::FlushAll() {}
|
||||
void RasterizerNull::FlushRegion(VAddr addr, u64 size) {}
|
||||
bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size) {
|
||||
void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
||||
bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {
|
||||
return false;
|
||||
}
|
||||
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size) {}
|
||||
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
||||
void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
|
||||
void RasterizerNull::InvalidateGPUCache() {}
|
||||
void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {}
|
||||
@@ -61,7 +61,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) {
|
||||
}
|
||||
void RasterizerNull::SignalReference() {}
|
||||
void RasterizerNull::ReleaseFences() {}
|
||||
void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size) {}
|
||||
void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
||||
void RasterizerNull::WaitForIdle() {}
|
||||
void RasterizerNull::FragmentBarrier() {}
|
||||
void RasterizerNull::TiledCacheBarrier() {}
|
||||
|
||||
@@ -38,9 +38,12 @@ public:
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
@@ -50,7 +53,8 @@ public:
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(
|
||||
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
|
||||
@@ -160,6 +160,10 @@ public:
|
||||
return device.CanReportMemoryUsage();
|
||||
}
|
||||
|
||||
u32 GetStorageBufferAlignment() const {
|
||||
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::array PABO_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
||||
@@ -40,6 +40,7 @@ struct GraphicsPipelineKey {
|
||||
BitField<6, 2, Maxwell::Tessellation::DomainType> tessellation_primitive;
|
||||
BitField<8, 2, Maxwell::Tessellation::Spacing> tessellation_spacing;
|
||||
BitField<10, 1, u32> tessellation_clockwise;
|
||||
BitField<11, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
|
||||
};
|
||||
std::array<u32, 3> padding;
|
||||
VideoCommon::TransformFeedbackState xfb_state;
|
||||
|
||||
@@ -202,7 +202,8 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
|
||||
template <typename Func>
|
||||
void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
@@ -226,48 +227,97 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
|
||||
BeginTransformFeedback(pipeline, primitive_mode);
|
||||
|
||||
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
|
||||
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
|
||||
if (is_indexed) {
|
||||
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
|
||||
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
|
||||
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
|
||||
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
|
||||
glDrawElements(primitive_mode, num_vertices, format, offset);
|
||||
} else if (num_instances == 1 && base_instance == 0) {
|
||||
glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
|
||||
} else if (base_vertex == 0 && base_instance == 0) {
|
||||
glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
|
||||
} else if (base_vertex == 0) {
|
||||
glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
|
||||
num_instances, base_instance);
|
||||
} else if (base_instance == 0) {
|
||||
glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
|
||||
num_instances, base_vertex);
|
||||
} else {
|
||||
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
|
||||
offset, num_instances, base_vertex,
|
||||
base_instance);
|
||||
}
|
||||
} else {
|
||||
const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
|
||||
if (num_instances == 1 && base_instance == 0) {
|
||||
glDrawArrays(primitive_mode, base_vertex, num_vertices);
|
||||
} else if (base_instance == 0) {
|
||||
glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
|
||||
} else {
|
||||
glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
|
||||
num_instances, base_instance);
|
||||
}
|
||||
}
|
||||
draw_func(primitive_mode);
|
||||
|
||||
EndTransformFeedback();
|
||||
|
||||
++num_queued_commands;
|
||||
has_written_global_memory |= pipeline->WritesGlobalMemory();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
|
||||
PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
|
||||
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
|
||||
if (is_indexed) {
|
||||
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
|
||||
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
|
||||
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
|
||||
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
|
||||
glDrawElements(primitive_mode, num_vertices, format, offset);
|
||||
} else if (num_instances == 1 && base_instance == 0) {
|
||||
glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
|
||||
} else if (base_vertex == 0 && base_instance == 0) {
|
||||
glDrawElementsInstanced(primitive_mode, num_vertices, format, offset,
|
||||
num_instances);
|
||||
} else if (base_vertex == 0) {
|
||||
glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
|
||||
num_instances, base_instance);
|
||||
} else if (base_instance == 0) {
|
||||
glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
|
||||
num_instances, base_vertex);
|
||||
} else {
|
||||
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
|
||||
offset, num_instances, base_vertex,
|
||||
base_instance);
|
||||
}
|
||||
} else {
|
||||
const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
|
||||
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
|
||||
if (num_instances == 1 && base_instance == 0) {
|
||||
glDrawArrays(primitive_mode, base_vertex, num_vertices);
|
||||
} else if (base_instance == 0) {
|
||||
glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
|
||||
} else {
|
||||
glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
|
||||
num_instances, base_instance);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DrawIndirect() {
|
||||
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
|
||||
buffer_cache.SetDrawIndirect(¶ms);
|
||||
PrepareDraw(params.is_indexed, [this, ¶ms](GLenum primitive_mode) {
|
||||
const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
|
||||
const GLvoid* const gl_offset =
|
||||
reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
|
||||
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer->Handle());
|
||||
if (params.include_count) {
|
||||
const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
|
||||
glBindBuffer(GL_PARAMETER_BUFFER, draw_buffer->Handle());
|
||||
|
||||
if (params.is_indexed) {
|
||||
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
|
||||
glMultiDrawElementsIndirectCount(primitive_mode, format, gl_offset,
|
||||
static_cast<GLintptr>(offset_base),
|
||||
static_cast<GLsizei>(params.max_draw_counts),
|
||||
static_cast<GLsizei>(params.stride));
|
||||
} else {
|
||||
glMultiDrawArraysIndirectCount(primitive_mode, gl_offset,
|
||||
static_cast<GLintptr>(offset_base),
|
||||
static_cast<GLsizei>(params.max_draw_counts),
|
||||
static_cast<GLsizei>(params.stride));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (params.is_indexed) {
|
||||
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
|
||||
glMultiDrawElementsIndirect(primitive_mode, format, gl_offset,
|
||||
static_cast<GLsizei>(params.max_draw_counts),
|
||||
static_cast<GLsizei>(params.stride));
|
||||
} else {
|
||||
glMultiDrawArraysIndirect(primitive_mode, gl_offset,
|
||||
static_cast<GLsizei>(params.max_draw_counts),
|
||||
static_cast<GLsizei>(params.stride));
|
||||
}
|
||||
});
|
||||
buffer_cache.SetDrawIndirect(nullptr);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute() {
|
||||
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
|
||||
if (!pipeline) {
|
||||
@@ -302,46 +352,60 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
{
|
||||
if ((True(which & VideoCommon::CacheType::BufferCache))) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.IsRegionGpuModified(addr, size);
|
||||
if ((True(which & VideoCommon::CacheType::QueryCache))) {
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
|
||||
if ((True(which & VideoCommon::CacheType::BufferCache))) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
if (buffer_cache.IsRegionGpuModified(addr, size)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return false;
|
||||
}
|
||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
return texture_cache.IsRegionGpuModified(addr, size);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
if (True(which & VideoCommon::CacheType::TextureCache)) {
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
{
|
||||
if (True(which & VideoCommon::CacheType::BufferCache)) {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
if (True(which & VideoCommon::CacheType::ShaderCache)) {
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
if (True(which & VideoCommon::CacheType::QueryCache)) {
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
@@ -408,11 +472,12 @@ void RasterizerOpenGL::ReleaseFences() {
|
||||
fence_manager.WaitPendingFences();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which) {
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
FlushRegion(addr, size);
|
||||
FlushRegion(addr, size, which);
|
||||
}
|
||||
InvalidateRegion(addr, size);
|
||||
InvalidateRegion(addr, size, which);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::WaitForIdle() {
|
||||
@@ -460,6 +525,21 @@ void RasterizerOpenGL::TickFrame() {
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateConditionalRendering() {
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// Reimplement Host conditional rendering.
|
||||
return false;
|
||||
}
|
||||
// Medium / Low Hack: stub any checks on queries writen into the buffer cache.
|
||||
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
|
||||
Maxwell::ReportSemaphore::Compare cmp;
|
||||
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
|
||||
VideoCommon::CacheType::BufferCache)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
@@ -481,7 +561,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
|
||||
}
|
||||
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
|
||||
std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
|
||||
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
|
||||
buffer_cache.WriteMemory(*cpu_addr, copy_size);
|
||||
}
|
||||
|
||||
@@ -69,6 +69,7 @@ public:
|
||||
~RasterizerOpenGL() override;
|
||||
|
||||
void Draw(bool is_indexed, u32 instance_count) override;
|
||||
void DrawIndirect() override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
@@ -76,9 +77,12 @@ public:
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
bool MustFlushRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
@@ -88,12 +92,14 @@ public:
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(
|
||||
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateConditionalRendering() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
@@ -121,6 +127,9 @@ private:
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
template <typename Func>
|
||||
void PrepareDraw(bool is_indexed, Func&&);
|
||||
|
||||
/// Syncs state to match guest's
|
||||
void SyncState();
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
|
||||
using VideoCommon::SerializePipeline;
|
||||
using Context = ShaderContext::Context;
|
||||
|
||||
constexpr u32 CACHE_VERSION = 7;
|
||||
constexpr u32 CACHE_VERSION = 9;
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
} {
|
||||
if (use_asynchronous_shaders) {
|
||||
workers = CreateWorkers();
|
||||
@@ -350,6 +352,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
|
||||
regs.tessellation.params.output_primitives.Value() ==
|
||||
Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
|
||||
graphics_key.xfb_enabled.Assign(regs.transform_feedback_enabled != 0 ? 1 : 0);
|
||||
graphics_key.app_stage.Assign(maxwell3d->engine_state);
|
||||
if (graphics_key.xfb_enabled) {
|
||||
SetXfbState(graphics_key.xfb_state, regs);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user