Compare commits

...

130 Commits

Author SHA1 Message Date
gxcreator
8af62c9997 Fixed packaging on some systems with different QT5 path 2020-04-23 21:45:07 +03:00
bunnei
ff0c49e1ce kernel: memory: Improve implementation of device shared memory. (#3707)
* kernel: memory: Improve implementation of device shared memory.

* fixup! kernel: memory: Improve implementation of device shared memory.

* fixup! kernel: memory: Improve implementation of device shared memory.
2020-04-23 11:37:12 -04:00
bunnei
eb26e9e711 Merge pull request #3730 from lioncash/time
service/time: Remove reliance on the global system accessor
2020-04-23 02:41:38 -04:00
bunnei
2409fedacf Merge pull request #3697 from lioncash/declarations
CMakeLists: Enable -Wmissing-declarations on Linux builds
2020-04-23 02:18:52 -04:00
bunnei
bf2ddb8fd5 Merge pull request #3677 from FernandoS27/better-sync
Introduce Predictive Flushing and Improve ASYNC GPU
2020-04-22 22:09:38 -04:00
Mat M
2c806c5fd3 Merge pull request #3767 from ReinUsesLisp/point-size-pipeline
vk_pipeline_cache: Fix unintentional memcpy into optional
2020-04-22 21:20:14 -04:00
ReinUsesLisp
d9463f4562 vk_pipeline_cache: Fix unintentional memcpy into optional
The intention behind this was to assign a float to from an uint32_t, but
it was unintentionally being copied directly into the std::optional.

Copy to a temporary and assign that temporary to std::optional. This can
be replaced with std::bit_cast<float> once we are in C++20.
2020-04-22 21:36:05 -03:00
Fernando Sahmkow
c043ac4f13 GL_Fence_Manager: use GL_TIMEOUT_IGNORED instead of a loop, 2020-04-22 20:34:32 -04:00
bunnei
bee2c64b34 Merge pull request #3725 from MerryMage/fpcr
thread: FPCR.FZ is likely not 1 (and FPCR.RMode = TieAway and FPCR.DN = 0)
2020-04-22 19:49:13 -04:00
Mat M
6ce3d174b7 Merge pull request #3759 from H27CK/opus-mingw-w64
Set _FORTIFY_SOURCE=0 if building Opus with mingw-w64
2020-04-22 17:45:44 -04:00
Fernando Sahmkow
63d2ba4f69 Merge pull request #3763 from H27CK/vk-cmd
Add missing ;
2020-04-22 17:43:42 -04:00
H27CK
4d74578d35 Add missing ; 2020-04-22 23:36:21 +02:00
bunnei
5ed13304e1 Merge pull request #3758 from H27CK/vk-cmd
Introduce dummy context for yuzu-cmd VK support
2020-04-22 12:37:01 -04:00
Fernando Sahmkow
afae40a99e Merge pull request #3653 from ReinUsesLisp/nsight-aftermath
renderer_vulkan: Integrate Nvidia Nsight Aftermath on Windows
2020-04-22 11:39:01 -04:00
Fernando Sahmkow
4e37f1b113 Address Feedback. 2020-04-22 11:36:27 -04:00
Fernando Sahmkow
39e5b72948 Async GPU: Correct flushing behavior to be similar to old async GPU behavior. 2020-04-22 11:36:26 -04:00
Fernando Sahmkow
1b3be8a8f8 MaxwellDMA: Correct copying on accuracy level. 2020-04-22 11:36:25 -04:00
Fernando Sahmkow
644588fd88 ShaderCache/PipelineCache: Cache null shaders. 2020-04-22 11:36:25 -04:00
Fernando Sahmkow
f616dc0b59 Address Feedback. 2020-04-22 11:36:24 -04:00
Fernando Sahmkow
ec2f3e48e1 Fix GCC error. 2020-04-22 11:36:23 -04:00
Fernando Sahmkow
7f44f22451 Correct Linux Compile Error. 2020-04-22 11:36:22 -04:00
Fernando Sahmkow
d2d4a6cbcf Clang format. 2020-04-22 11:36:22 -04:00
Fernando Sahmkow
b3e5f177ba QueryCache: Only do async flushes on async gpu. 2020-04-22 11:36:21 -04:00
Fernando Sahmkow
f4ab223ef0 Async GPU: Only do reactive flushing on Extreme Level. 2020-04-22 11:36:20 -04:00
ReinUsesLisp
b752faf2d3 vk_fence_manager: Initial implementation 2020-04-22 11:36:19 -04:00
Fernando Sahmkow
0649f05900 QueryCache: Implement Async Flushes. 2020-04-22 11:36:18 -04:00
Fernando Sahmkow
131b342130 OpenGL: Guarantee writes to Buffers. 2020-04-22 11:36:18 -04:00
Fernando Sahmkow
1fb516cd97 GPU: Implement Flush Requests for Async mode. 2020-04-22 11:36:17 -04:00
Fernando Sahmkow
b7bc3c2549 FenceManager: Manage syncpoints and rename fences to semaphores. 2020-04-22 11:36:16 -04:00
Fernando Sahmkow
96bb961a64 BufferCache: Refactor async managing. 2020-04-22 11:36:15 -04:00
Fernando Sahmkow
b10db7e4a5 FenceManager: Implement async buffer cache flushes on High settings 2020-04-22 11:36:15 -04:00
Fernando Sahmkow
4adfc9bb08 Rasterizer: Document SignalFence & ReleaseFences and setup skeletons on Vulkan. 2020-04-22 11:36:14 -04:00
Fernando Sahmkow
a081a7c855 GPU: Fix rebase errors. 2020-04-22 11:36:13 -04:00
Fernando Sahmkow
e84eb64e51 Rasterizer: Disable fence managing in synchronous gpu. 2020-04-22 11:36:12 -04:00
Fernando Sahmkow
165ae823f5 ThreadManager: Sync async reads on accurate gpu. 2020-04-22 11:36:12 -04:00
Fernando Sahmkow
57fdbd9b89 FenceManager: Implement should wait. 2020-04-22 11:36:11 -04:00
Fernando Sahmkow
1f345ebe3a GPU: Implement a Fence Manager. 2020-04-22 11:36:10 -04:00
Fernando Sahmkow
487379c593 OpenGL: Implement Fencing backend. 2020-04-22 11:36:10 -04:00
Fernando Sahmkow
ed7e965712 TextureCache: Flush linear textures after finishing rendering. 2020-04-22 11:36:09 -04:00
Fernando Sahmkow
339d0d9d6c GPU: Delay Fences. 2020-04-22 11:36:08 -04:00
Fernando Sahmkow
8b1eb44b3e BufferCache: Implement OnCPUWrite and SyncGuestHost 2020-04-22 11:36:07 -04:00
Fernando Sahmkow
da8f17715d GPU: Refactor synchronization on Async GPU 2020-04-22 11:36:06 -04:00
Fernando Sahmkow
a60a22d9c2 Texture Cache: Implement OnCPUWrite and SyncGuestHost 2020-04-22 11:36:05 -04:00
Fernando Sahmkow
084ceb925a UI: Replasce accurate GPU option for GPU Accuracy Level 2020-04-22 11:36:04 -04:00
H27CK
52e66779e7 Set _FORTIFY_SOURCE=0 if building Opus with mingw-w64 2020-04-22 08:20:12 +02:00
H27CK
c883cd103e Init SDL info structure and add dummy context 2020-04-22 07:47:21 +02:00
bunnei
e84f82a028 Merge pull request #3699 from FearlessTobi/port-5185
Port citra-emu/citra#5185: "gdbstub: Fix some gdbstub jankiness"
2020-04-21 22:26:10 -04:00
bunnei
d64290884a Merge pull request #3714 from lioncash/copies
gl_shader_decompiler: Avoid copies where applicable
2020-04-21 20:16:02 -04:00
bunnei
cd47ccec49 Merge pull request #3745 from bunnei/fix-homebrew-load
Fix process memory initialization for ELF and NRO
2020-04-21 18:59:16 -04:00
Rodrigo Locatti
f293b15611 Merge pull request #3718 from ReinUsesLisp/better-pipeline-state
fixed_pipeline_state: Pack structure, use memcmp and CityHash on it
2020-04-21 18:17:58 -03:00
bunnei
9bf3abcb63 Merge pull request #3698 from lioncash/warning
General: Resolve minor assorted warnings
2020-04-21 14:11:18 -04:00
bunnei
48b670d132 Merge pull request #3724 from bunnei/fix-unicorn
core: arm_unicorn: Fix interpret fallback by temporarily mapping instruction page.
2020-04-20 23:28:23 -04:00
David
11c63ca969 audio_renderer: Preliminary BehaviorInfo (#3736)
* audio_renderer: Preliminary BehaviorInfo

* clang format

* Fixed IsRevisionSupported

* fixed IsValidRevision

* Fixed logic error & spelling errors & crash

* Addressed issues
2020-04-20 22:57:30 -04:00
bunnei
d3e0cefa60 Merge pull request #3695 from ReinUsesLisp/default-attributes
maxwell_3d: Initialize format attributes constant as one
2020-04-20 21:40:18 -04:00
Mat M
cb5b8ca886 Merge pull request #3733 from ambasta/patch-2
Initialize quad_indexed_pass before uint8_pass
2020-04-20 20:36:46 -04:00
bunnei
9c12aef2f8 loader: nro: Fix process initialization using ProgramMetadata default. 2020-04-20 18:19:45 -04:00
bunnei
68039addbc loader: elf: Fix process initialization using ProgramMetadata default. 2020-04-20 18:19:45 -04:00
bunnei
f0a7f05070 file_sys: program_metadata: Add a helper function for generating reasonable default metadata.
- We need this for homebrew process initialization.
2020-04-20 18:19:45 -04:00
Mat M
200f69d2ff Merge pull request #3739 from MerryMage/disable_cpu_opt
dynarmic: Add option to disable CPU JIT optimizations
2020-04-20 14:19:18 -04:00
bunnei
fe2173429a Merge pull request #3741 from FearlessTobi/silence-warnings
Try to reduce log spam a bit by lowering log levels to Debug
2020-04-20 13:01:49 -04:00
FearlessTobi
4e99a06c70 npad: Lower log level for VibrateController to Debug 2020-04-20 18:44:57 +02:00
FearlessTobi
6ce0f3575a audren: Lower log level for RequestUpdateImpl to Debug 2020-04-20 18:44:41 +02:00
Fernando Sahmkow
ec2f8f4272 Merge pull request #3700 from ReinUsesLisp/stream-buffer-sizes
vk_stream_buffer: Fix out of memory on boot on recent Nvidia drivers
2020-04-20 09:37:42 -04:00
MerryMage
a3a12deecc dynarmic: Add option to disable CPU JIT optimizations 2020-04-20 13:36:26 +01:00
bunnei
6de36f0b61 Merge pull request #3712 from lioncash/remove
service: Remove unused RequestParser instances
2020-04-20 01:04:04 -04:00
bunnei
e3977243e2 Merge pull request #3709 from lioncash/am
am: Resolve ineffective moves
2020-04-20 00:15:00 -04:00
Amit Prakash Ambasta
5324b1d01e Initialize quad_indexed_pass before uint8_pass
Fixes Werror=reorder in gcc
2020-04-20 04:53:52 +05:30
Rodrigo Locatti
4932010c6f Merge pull request #3729 from lioncash/globals
dma_pusher: Remove reliance on the global system instance
2020-04-19 19:12:40 -03:00
bunnei
85c17a2c35 Merge pull request #3694 from ReinUsesLisp/indexed-quads
vk_compute_pass: Implement indexed quads
2020-04-19 16:52:40 -04:00
Lioncash
bfee33cce3 service/time: Remove reliance on the global system accessor
Eliminates usages of the global system accessor and instead passes the
existing system instance into the interfaces.
2020-04-19 16:31:28 -04:00
Lioncash
44e959157b dma_pusher: Remove reliance on the global system instance
With this, the video core is now has no calls to the global system
instance at all.
2020-04-19 16:12:08 -04:00
bunnei
2ea7a70da0 Merge pull request #3686 from lioncash/table
texture_cache/format_lookup_table: Fix incorrect green, blue, and alpha indices
2020-04-19 15:33:33 -04:00
bunnei
10fb26f69c Merge pull request #3696 from lioncash/cast-size
hle_ipc: Remove std::size_t casts where applicable
2020-04-19 14:24:15 -04:00
MerryMage
2bfac7b61d thread: FPCR.FZ is likely not 1 2020-04-19 08:37:20 +01:00
bunnei
779a3b222a Merge pull request #3655 from FearlessTobi/ui-retext-yuzu
yuzu/main: Add better popup texts and remove duplicated actions
2020-04-19 02:16:50 -04:00
bunnei
73db83c0ab Merge pull request #3679 from lioncash/track
track: Eliminate redundant copies
2020-04-19 01:22:47 -04:00
bunnei
9c85f385b1 Merge pull request #3720 from jbeich/freebsd
Unbreak Vulkan on FreeBSD
2020-04-19 01:20:59 -04:00
bunnei
74c27fd1b5 core: arm_unicorn: Fix interpret fallback by temporarily mapping instruction page. 2020-04-19 00:53:23 -04:00
Rodrigo Locatti
4d7d3651f3 Merge pull request #3719 from jbeich/clang
Unbreak -Werror=implicit-fallthrough with Clang
2020-04-18 21:43:12 -03:00
Jan Beich
afcc84a172 renderer_vulkan: assume X11 if not Windows/macOS after bf1d66b7c0
Render.Vulkan <Error> video_core/renderer_vulkan/renderer_vulkan.cpp:CreateInstance:131: Presentation not supported on this platform
Render.Vulkan <Error> video_core/renderer_vulkan/renderer_vulkan.cpp:CreateSurface:378: Presentation not supported on this platform
Core <Critical> core/core.cpp:Load:199: Failed to initialize system (Error 5)!
2020-04-19 00:32:23 +00:00
Jan Beich
1a2df0a5f3 cmake: Silence -Werror=implicit-fallthrough in SDL2 headers
In file included from src/input_common/sdl/sdl_impl.cpp:16:
In file included from /usr/local/include/SDL2/SDL.h:32:
In file included from /usr/local/include/SDL2/SDL_main.h:25:
/usr/local/include/SDL2/SDL_stdinc.h:445:9: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough]
        case 3:         *_p++ = _val;   /* fallthrough */
        ^
/usr/local/include/SDL2/SDL_stdinc.h:445:9: note: insert '[[fallthrough]];' to silence this warning
        case 3:         *_p++ = _val;   /* fallthrough */
        ^
        [[fallthrough]];
/usr/local/include/SDL2/SDL_stdinc.h:445:9: note: insert 'break;' to avoid fall-through
        case 3:         *_p++ = _val;   /* fallthrough */
        ^
        break;
/usr/local/include/SDL2/SDL_stdinc.h:446:9: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough]
        case 2:         *_p++ = _val;   /* fallthrough */
        ^
/usr/local/include/SDL2/SDL_stdinc.h:446:9: note: insert '[[fallthrough]];' to silence this warning
        case 2:         *_p++ = _val;   /* fallthrough */
        ^
        [[fallthrough]];
/usr/local/include/SDL2/SDL_stdinc.h:446:9: note: insert 'break;' to avoid fall-through
        case 2:         *_p++ = _val;   /* fallthrough */
        ^
        break;
/usr/local/include/SDL2/SDL_stdinc.h:447:9: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough]
        case 1:         *_p++ = _val;   /* fallthrough */
        ^
/usr/local/include/SDL2/SDL_stdinc.h:447:9: note: insert '[[fallthrough]];' to silence this warning
        case 1:         *_p++ = _val;   /* fallthrough */
        ^
        [[fallthrough]];
/usr/local/include/SDL2/SDL_stdinc.h:447:9: note: insert 'break;' to avoid fall-through
        case 1:         *_p++ = _val;   /* fallthrough */
        ^
        break;
3 errors generated.
2020-04-18 23:26:22 +00:00
ReinUsesLisp
d62f57cf5a fixed_pipeline_state: Hash and compare the whole structure
Pad FixedPipelineState's size to 384 bytes to be a multiple of 16.

Compare the whole struct with std::memcmp and hash with CityHash. Using
CityHash instead of a naive hash should reduce the number of collisions.
Improve used type traits to ensure this operation is safe.

With these changes the improvements to the hashable pipeline state are:

Optimized structure
Hash:            89 ns
Comparison:     103 ns
Construction*:  164 ns
Struct size:    384 bytes

Original structure
Hash:           148 ns
Equal:          174 ns
Construction*:  281 ns
Size:          1384 bytes

* Attribute state initialization is not measured

These measures are averages taken with std::chrono::high_accuracy_clock
on MSVC shipped on Visual Studio 16.6.0 Preview 2.1.
2020-04-18 19:57:26 -03:00
ReinUsesLisp
b571c92dfd fixed_pipeline_state: Pack blending state
Reduce FixedPipelineState's size to 364 bytes.
2020-04-18 19:23:35 -03:00
ReinUsesLisp
548dd27f45 fixed_pipeline_state: Pack rasterizer state
Reduce FixedPipelineState's size to 600 bytes.
2020-04-18 19:22:57 -03:00
ReinUsesLisp
7790144a55 fixed_pipeline_state: Pack depth stencil state
Reduce FixedPipelineState's size to 632 bytes.
2020-04-18 19:22:11 -03:00
ReinUsesLisp
ab6704f20c fixed_pipeline_state: Pack attribute state
Reduce FixedPipelineState's size from 1384 to 664 bytes
2020-04-18 19:21:19 -03:00
Mat M
5305806071 Merge pull request #3716 from bunnei/fix-another-impl-fallthrough
video_core: gl_shader_decompiler: Fix implicit fallthrough errors.
2020-04-18 15:17:52 -04:00
bunnei
03726fb7f5 video_core: gl_shader_decompiler: Fix implicit fallthrough errors. 2020-04-18 15:15:21 -04:00
bunnei
89e512ca8d Merge pull request #3710 from lioncash/nso
loader/nso: Resolve moves not occurring in DecompressSegment
2020-04-18 14:44:42 -04:00
Mat M
45964e6fec Merge pull request #3715 from bunnei/fix-impl-fallthrough
service: hid: npad: Fix implicit fallthrough errors.
2020-04-18 14:44:20 -04:00
bunnei
a8d5d08e2e service: hid: npad: Fix implicit fallthrough errors. 2020-04-18 14:41:08 -04:00
bunnei
907ba8794e Merge pull request #3713 from lioncash/time
service/time: Minor changes
2020-04-17 21:04:43 -04:00
bunnei
90ddb13372 Merge pull request #3711 from lioncash/cast
memory/slab_heap: Make use of static_cast over reinterpret_cast
2020-04-17 21:04:11 -04:00
Lioncash
bf328ed35a gl_shader_decompiler: Avoid copies where applicable
Avoids unnecessary reference count increments where applicable and also
avoids reallocating a vector.

Unlikely to make a huge difference, but given how trivial of an
amendment it is, why not?
2020-04-17 20:48:52 -04:00
Lioncash
7714b02d95 time/system_clock_core: Remove unnecessary initializer
This is already initialized within the class body.
2020-04-17 20:04:06 -04:00
Lioncash
b533f18ab9 service/time: Mark IsStandardNetworkSystemClockAccuracySufficient as const
This doesn't modify internal member state.
2020-04-17 20:02:45 -04:00
Lioncash
0cfd3b94db service/time: Add virtual destructors where applicable
Many of these implementations are used to implement a polymorphic
interface. While not directly used polymorphically, this prevents
virtual destruction from ever becoming an issue.
2020-04-17 19:59:31 -04:00
Lioncash
4d8a8a8033 service: Remove unused RequestParser instances
These aren't used, so they should be removed to reduce compilation
warnings.
2020-04-17 19:47:43 -04:00
bunnei
fd7dc7e03d Merge pull request #3704 from lioncash/fmt
externals: Update fmt to 6.2.0
2020-04-17 19:47:30 -04:00
bunnei
7438d36d0e Merge pull request #3630 from benru/open-windows-network-files
common/file_util: Allow access to files on network shares
2020-04-17 19:47:11 -04:00
Lioncash
7e585bce28 memory/slab_heap: Make use of static_cast over reinterpret_cast
Casting from void* with static_cast is permitted by the standard, so we
can just make use of that instead.
2020-04-17 19:38:59 -04:00
Lioncash
441a2812ed loader/nso: Resolve moves not occurring in DecompressSegment
Given the std::vector was const, an automatic move out of the function
could not occur.

We can allow automatic return value optimizations to occur by making the
buffer non-const.
2020-04-17 19:26:50 -04:00
Lioncash
64f226889c am: Resolve ineffective moves
Previously const objects were being std::moved, which results in no move
actually occurring. This resolves that.
2020-04-17 19:22:46 -04:00
Mat M
30b59b732c Merge pull request #3706 from degasus/fix_fallthrough_error
video_code: Fix implicit switch fallthrough.
2020-04-17 17:48:10 -04:00
Markus Wick
07fbef1776 video_code: Fix implicit switch fallthrough.
Since yesterday, this breaks the build on linux.
So let's fix it.
2020-04-17 23:43:35 +02:00
ReinUsesLisp
a7b6bd56d7 vk_stream_buffer: Fix out of memory on boot on recent Nvidia drivers
Nvidia recently introduced a new memory type for data streaming
(awesome!), but yuzu was assuming that all heaps had enough memory
for the assumed stream buffer size (256 MiB).

This worked fine on AMD but Nvidia's new memory heap was smaller than
256 MiB. This commit changes this assumption and allocates a bit less
than the size of the preferred heap, with a maximum of 256 MiB (to avoid
allocating all system memory on integrated devices).

- Fixes a crash on NVIDIA 450.82.0.0
2020-04-17 18:12:48 -03:00
Fernando Sahmkow
2133482a17 Merge pull request #3703 from yuzu-emu/revert-3656-glsl-full-decompile
Revert "gl_shader_cache: Use CompileDepth::FullDecompile on GLSL"
2020-04-17 17:08:41 -04:00
Fernando Sahmkow
775ecc7d05 Merge pull request #3672 from lioncash/null
file_util: Early-exit in WriteArray and ReadArray if specified lengths are zero
2020-04-17 17:02:35 -04:00
Rodrigo Locatti
990c0b184f Revert "gl_shader_cache: Use CompileDepth::FullDecompile on GLSL" 2020-04-17 17:41:48 -03:00
Lioncash
dcbb39cdae CMakeLists: Make missing declarations a compile-time error
Ensures that our code always has its linkage explicit.
2020-04-16 23:43:41 -04:00
Lioncash
e2d8be1ca2 General: Resolve warnings related to missing declarations 2020-04-16 23:43:34 -04:00
MerryMage
1cc9507bc5 gdbstub: Fix some gdbstub jankiness
1. Ensure that register information available to gdbstub is most up-to-date.
2. There's no reason to check for current_thread == thread when emitting a trap.
   Doing this results in random hangs whenever a step happens upon a thread switch.
2020-04-17 05:41:43 +02:00
Lioncash
8f9c599c9f key_manager: Resolve missing field initializer warning 2020-04-16 22:45:44 -04:00
Lioncash
678ac54749 decode/memory: Resolve unused variable warning
Only the first element of the returned pair is ever used.
2020-04-16 22:45:44 -04:00
Lioncash
d159643fd7 decode/texture: Resolve unused variable warnings.
Some variables aren't used, so we can remove these.

Unfortunately, diagnostics are still reported on structured bindings
even when annotated with [[maybe_unused]], so we need to unpack the
elements that we want to use manually.
2020-04-16 22:45:41 -04:00
Lioncash
f522abd8ab decode/texture: Collapse loop down into std::generate
Same behavior, less code.
2020-04-16 22:29:07 -04:00
Lioncash
7e2d60de26 decode/texture: Eliminate trivial missing field initializer warnings
We can just specify the initializers.
2020-04-16 22:27:21 -04:00
Lioncash
337f2dc11f time_zone_manager: Resolve sign conversion warnings
ttis and ats will never exceed the length of INT32_MAX in our case, so
this is safe.
2020-04-16 22:23:59 -04:00
Lioncash
fc5df84581 CMakeLists: Enable -Wmissing-declarations on Linux builds
Allows catching cases where internal linkage isn't specified for helper
functions when they should be marked as such.
2020-04-16 22:07:16 -04:00
Lioncash
77356731a9 hle_ipc: Remove std::size_t casts where applicable
These were added in the change that enabled -Wextra on linux builds so
as not to introduce interface changes in the same change as a
build-system flag addition.

Now that the flags are enabled, we can freely change the interface to
make these unnecessary.
2020-04-16 22:02:10 -04:00
ReinUsesLisp
238c6016f9 maxwell_3d: Initialize format attributes constant as one
nouveau expects this to be true but it doesn't set it.
2020-04-16 21:15:07 -03:00
ReinUsesLisp
c961770900 vk_compute_pass: Implement indexed quads
Implement indexed quads (GL_QUADS used with glDrawElements*) with a
compute pass conversion.

The compute shader converts from uint8/uint16/uint32 indices to uint32.
The format is passed through push constants to avoid having different
variants of the same shader.

- Used by Fast RMX
- Used by Xenoblade Chronicles 2 (it still has graphical due to
synchronization issues on Vulkan)
2020-04-16 21:12:32 -03:00
Lioncash
636c8ab85b texture_cache/format_lookup_table: Fix incorrect green, blue, and alpha indices
Previously these were all using the red component to derive the indices,
which is definitely not intentional.
2020-04-15 23:50:46 -04:00
Lioncash
e15ec2705c track: Eliminate redundant copies
Two variables can be references, while two others can be std::moved.
Makes for 4 less atomic reference count increments and decrements.
2020-04-15 21:50:09 -04:00
Lioncash
e77337588e file_util: Early-exit in WriteArray and ReadArray if specified lengths are zero
It's undefined behavior to pass a null pointer to std::fread and
std::fwrite, even if the length passed in is zero, so we must perform
the precondition checking ourselves.

A common case where this can occur is when passing in the data of an
empty std::vector and size, as an empty vector will typically have a
null internal buffer.

While we're at it, we can move the implementation out of line and add
debug checks against passing in nullptr to std::fread and std::fwrite.
2020-04-15 14:43:37 -04:00
ReinUsesLisp
0e232cfdc1 renderer_vulkan: Integrate Nvidia Nsight Aftermath on Windows
Adds optional support for Nsight Aftermath. It is enabled through
ENABLE_NSIGHT_AFTERMATH in cmake. A path to the SDK has to be provided
by the environment variable NSIGHT_AFTERMATH_SDK.

Nsight Aftermath allows an application to generate "minidumps" of the
GPU state when a device loss happens. By analysing these on Nsight we
can know what a game was doing and why it triggered a device loss.

The dump is generated inside %APPDATA%\yuzu\log\gpucrash and this
directory is deleted every time a new instance is initialized with
Nsight enabled.

To enable it on yuzu there has a to be a driver and device capable of
running Nsight Aftermath on Vulkan. That means only Turing based GPUs
on the latest stable driver, beta drivers won't work for now.

It is manually enabled in Configuration>Debug>Enable Graphics Debugging
because when using all debugging capabilities there is a runtime cost.
2020-04-14 00:39:21 -03:00
FearlessTobi
c2bf91156a yuzu/main: Add better popup texts and remove duplicated actions
Makes popup texts more compact and clear and also links our quickstart guide now.
Also removes OnMenuSelectEmulatedDirectory from the File dropdown, as the action already exists in the Filesystem tab and provides better visual feedback there.
2020-04-14 02:56:22 +02:00
ReinUsesLisp
6cfe2a7246 renderer_vulkan: Remove Nvidia checkpoints 2020-04-13 17:33:59 -03:00
ReinUsesLisp
16105c6a66 renderer_vulkan: Catch device losses in more places 2020-04-13 17:33:59 -03:00
Ben Russell
f98a2c42de common/file_util: Allow access to files on network shares
On Windows, network shares use paths like \\server\share\file which were
being broken by FileUtil::SanitizePath() removing double slashes.

Changed the code in SanitizePath to permit a double-backslash if it
occurs at the start of a filepath (on Windows only).
2020-04-09 18:48:28 +01:00
126 changed files with 3192 additions and 1009 deletions

8
.ci/scripts/windows/docker.sh Normal file → Executable file
View File

@@ -29,7 +29,13 @@ echo 'Prepare binaries...'
cd ..
mkdir package
QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
if [ -d "/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/" ]; then
QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
else
#fallback to qt
QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt/plugins/platforms/'
fi
find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
# copy Qt plugins

View File

@@ -164,7 +164,7 @@ if (ENABLE_SDL2)
set(SDL2_LIBRARIES "SDL2::SDL2")
endif()
include_directories(${SDL2_INCLUDE_DIRS})
include_directories(SYSTEM ${SDL2_INCLUDE_DIRS})
add_library(SDL2 INTERFACE)
target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
endif()

View File

@@ -910,14 +910,14 @@ typedef void* (*MicroProfileThreadFunc)(void*);
#ifndef _WIN32
typedef pthread_t MicroProfileThread;
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
{
pthread_attr_t Attr;
int r = pthread_attr_init(&Attr);
MP_ASSERT(r == 0);
pthread_create(pThread, &Attr, Func, 0);
}
void MicroProfileThreadJoin(MicroProfileThread* pThread)
inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
{
int r = pthread_join(*pThread, 0);
MP_ASSERT(r == 0);
@@ -930,11 +930,11 @@ DWORD _stdcall ThreadTrampoline(void* pFunc)
return (uint32_t)F(0);
}
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
{
*pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
}
void MicroProfileThreadJoin(MicroProfileThread* pThread)
inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
{
WaitForSingleObject(*pThread, INFINITE);
CloseHandle(*pThread);
@@ -1131,7 +1131,7 @@ inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
}
#else
MicroProfileThreadLog* MicroProfileGetThreadLog()
inline MicroProfileThreadLog* MicroProfileGetThreadLog()
{
return g_MicroProfileThreadLog;
}
@@ -1247,7 +1247,7 @@ MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
return MICROPROFILE_INVALID_TOKEN;
}
uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
inline uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
{
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
@@ -1276,7 +1276,7 @@ uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
return nGroupIndex;
}
void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
inline void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
{
int nCategoryIndex = -1;
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
@@ -1442,7 +1442,7 @@ void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart)
}
}
void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
inline void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
{
if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks)
{
@@ -1894,7 +1894,7 @@ void MicroProfileSetEnableAllGroups(bool bEnableAllGroups)
S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0;
}
void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
inline void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
{
int nCategoryIndex = -1;
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
@@ -2004,7 +2004,7 @@ void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Typ
}
void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
inline void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
{
for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
{

View File

@@ -417,19 +417,19 @@ void MicroProfileToggleDisplayMode()
}
void MicroProfileStringArrayClear(MicroProfileStringArray* pArray)
inline void MicroProfileStringArrayClear(MicroProfileStringArray* pArray)
{
pArray->nNumStrings = 0;
pArray->pBufferPos = &pArray->Buffer[0];
}
void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral)
inline void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral)
{
MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
pArray->ppStrings[pArray->nNumStrings++] = pLiteral;
}
void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...)
inline void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...)
{
MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
pArray->ppStrings[pArray->nNumStrings++] = pArray->pBufferPos;
@@ -439,7 +439,7 @@ void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char*
va_end(args);
MP_ASSERT(pArray->pBufferPos < pArray->Buffer + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE);
}
void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc)
inline void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc)
{
memcpy(&pDest->ppStrings[0], &pSrc->ppStrings[0], sizeof(pDest->ppStrings));
memcpy(&pDest->Buffer[0], &pSrc->Buffer[0], sizeof(pDest->Buffer));
@@ -456,7 +456,7 @@ void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStr
pDest->nNumStrings = pSrc->nNumStrings;
}
void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0)
inline void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0)
{
uint32_t* nStringLengths = pStringLengths ? pStringLengths : (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
uint32_t nTextCount = nNumStrings/2;
@@ -474,7 +474,7 @@ void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, u
nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nTextCount + 2 * MICROPROFILE_BORDER_SIZE;
}
void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
inline void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
{
uint32_t nWidth = 0, nHeight = 0;
uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
@@ -503,7 +503,7 @@ void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppString
nY += (MICROPROFILE_TEXT_HEIGHT+1);
}
}
void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
inline void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
{
uint32_t nWidth = 0, nHeight = 0;
uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
@@ -529,7 +529,7 @@ void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, u
void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
inline void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
{
MicroProfile& S = *MicroProfileGet();
if(UI.nRangeBeginIndex != UI.nRangeEndIndex && UI.pRangeLog)
@@ -608,7 +608,7 @@ void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
}
}
void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime)
inline void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime)
{
MicroProfile& S = *MicroProfileGet();
@@ -718,7 +718,7 @@ void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uin
}
void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
inline void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
{
MicroProfile& S = *MicroProfileGet();
@@ -728,7 +728,7 @@ void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
UI.fDetailedRangeTarget = MicroProfileLogTickDifference(nTickStart, nTickEnd) * fToMs;
}
void MicroProfileCenter(int64_t nTickCenter)
inline void MicroProfileCenter(int64_t nTickCenter)
{
MicroProfile& S = *MicroProfileGet();
int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu;
@@ -739,7 +739,7 @@ void MicroProfileCenter(int64_t nTickCenter)
#ifdef MICROPROFILE_DEBUG
uint64_t* g_pMicroProfileDumpStart = 0;
uint64_t* g_pMicroProfileDumpEnd = 0;
void MicroProfileDebugDumpRange()
inline void MicroProfileDebugDumpRange()
{
MicroProfile& S = *MicroProfileGet();
if(g_pMicroProfileDumpStart != g_pMicroProfileDumpEnd)
@@ -777,7 +777,7 @@ void MicroProfileDebugDumpRange()
#define MICROPROFILE_HOVER_DIST 0.5f
void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY)
inline void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY)
{
MicroProfile& S = *MicroProfileGet();
int64_t nTickIn = -1;
@@ -841,7 +841,7 @@ void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId,
}
}
void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame)
inline void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame)
{
MicroProfile& S = *MicroProfileGet();
MP_DEBUG_DUMP_RANGE();
@@ -1325,7 +1325,7 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
}
void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame)
inline void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame)
{
MicroProfile& S = *MicroProfileGet();
@@ -1379,7 +1379,7 @@ void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uin
}
MicroProfileDrawBox(fSelectionStart, nBaseY, fSelectionEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat);
}
void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
inline void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
{
MicroProfile& S = *MicroProfileGet();
@@ -1416,11 +1416,11 @@ void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
MicroProfileDrawDetailedFrameHistory(nWidth, nHeight, nBaseY, nSelectedFrame);
}
void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen)
inline void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen)
{
MicroProfileDrawText(nX - nStrLen * (MICROPROFILE_TEXT_WIDTH+1), nY, nColor, pStr, nStrLen);
}
void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
inline void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
{
if(pName)
{
@@ -1432,7 +1432,7 @@ void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
typedef void (*MicroProfileLoopGroupCallback)(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pData);
void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData)
inline void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData)
{
MicroProfile& S = *MicroProfileGet();
nY += MICROPROFILE_TEXT_HEIGHT + 2;
@@ -1465,7 +1465,7 @@ void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName,
}
void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize)
inline void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize)
{
MicroProfile& S = *MicroProfileGet();
@@ -1527,7 +1527,7 @@ void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float*
#define SBUF_MAX 32
void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
inline void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
{
const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT;
const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
@@ -1547,7 +1547,7 @@ void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t n
}
uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL)
inline uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL)
{
const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
const uint32_t nWidth = MICROPROFILE_BAR_WIDTH;
@@ -1559,7 +1559,7 @@ uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const
return nWidth + 5 + nTextWidth;
}
void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
inline void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
{
MicroProfile& S = *MicroProfileGet();
char sBuffer[SBUF_MAX];
@@ -1567,7 +1567,7 @@ void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64
MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, nLen);
}
uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName)
inline uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName)
{
MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarCallCountCallback, 0);
const uint32_t nTextWidth = 6 * MICROPROFILE_TEXT_WIDTH;
@@ -1581,7 +1581,7 @@ struct MicroProfileMetaAverageArgs
float fRcpFrames;
};
void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
inline void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
{
MicroProfileMetaAverageArgs* pArgs = (MicroProfileMetaAverageArgs*)pExtra;
uint64_t* pCounters = pArgs->pCounters;
@@ -1591,7 +1591,7 @@ void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint
MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
}
uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
inline uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
{
if(!pName)
return 0;
@@ -1605,7 +1605,7 @@ uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCount
}
void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
inline void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
{
uint64_t* pCounters = (uint64_t*)pExtra;
char sBuffer[SBUF_MAX];
@@ -1613,7 +1613,7 @@ void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64
MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
}
uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
inline uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
{
if(!pName)
return 0;
@@ -1625,7 +1625,7 @@ uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounter
return 5 + nTextWidth;
}
void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
inline void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
{
MicroProfile& S = *MicroProfileGet();
if (S.TimerInfo[nTimer].bGraph)
@@ -1640,7 +1640,7 @@ void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t
}
}
uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth)
inline uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth)
{
MicroProfileDrawLineVertical(nX-5, nY, nTotalHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
MicroProfileLoopActiveGroupsDraw(nMaxWidth, nY, 0, MicroProfileDrawBarLegendCallback, 0);
@@ -1807,7 +1807,7 @@ void MicroProfileDumpTimers()
}
}
void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight)
inline void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight)
{
MicroProfile& S = *MicroProfileGet();
@@ -1951,7 +1951,7 @@ typedef const char* (*MicroProfileSubmenuCallback)(int, bool* bSelected);
typedef void (*MicroProfileClickCallback)(int);
const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
{
MicroProfile& S = *MicroProfileGet();
switch(nIndex)
@@ -1979,7 +1979,7 @@ const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
}
}
const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
{
MicroProfile& S = *MicroProfileGet();
*bSelected = false;
@@ -2012,7 +2012,7 @@ const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
}
}
const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
{
MicroProfile& S = *MicroProfileGet();
if(nIndex < sizeof(g_MicroProfileAggregatePresets)/sizeof(g_MicroProfileAggregatePresets[0]))
@@ -2032,7 +2032,7 @@ const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
}
const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
{
MicroProfile& S = *MicroProfileGet();
*bSelected = 0 != (S.nBars & (1 << nIndex));
@@ -2054,7 +2054,7 @@ const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
return 0;
}
const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
{
MicroProfile& S = *MicroProfileGet();
if(nIndex >= MICROPROFILE_OPTION_SIZE) return 0;
@@ -2094,7 +2094,7 @@ const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
return UI.Options[nIndex].Text;
}
const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
{
static char buf[128];
*bSelected = false;
@@ -2118,7 +2118,7 @@ const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
}
}
const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
{
if((uint32_t)-1 == UI.nCustomActive)
{
@@ -2145,13 +2145,13 @@ const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
}
}
const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected)
inline const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected)
{
return 0;
}
void MicroProfileUIClickMode(int nIndex)
inline void MicroProfileUIClickMode(int nIndex)
{
MicroProfile& S = *MicroProfileGet();
switch(nIndex)
@@ -2176,7 +2176,7 @@ void MicroProfileUIClickMode(int nIndex)
}
}
void MicroProfileUIClickGroups(int nIndex)
inline void MicroProfileUIClickGroups(int nIndex)
{
MicroProfile& S = *MicroProfileGet();
if(nIndex == 0)
@@ -2208,7 +2208,7 @@ void MicroProfileUIClickGroups(int nIndex)
}
}
void MicroProfileUIClickAggregate(int nIndex)
inline void MicroProfileUIClickAggregate(int nIndex)
{
MicroProfile& S = *MicroProfileGet();
S.nAggregateFlip = g_MicroProfileAggregatePresets[nIndex];
@@ -2218,13 +2218,13 @@ void MicroProfileUIClickAggregate(int nIndex)
}
}
void MicroProfileUIClickTimers(int nIndex)
inline void MicroProfileUIClickTimers(int nIndex)
{
MicroProfile& S = *MicroProfileGet();
S.nBars ^= (1 << nIndex);
}
void MicroProfileUIClickOptions(int nIndex)
inline void MicroProfileUIClickOptions(int nIndex)
{
MicroProfile& S = *MicroProfileGet();
switch(UI.Options[nIndex].nSubType)
@@ -2271,7 +2271,7 @@ void MicroProfileUIClickOptions(int nIndex)
}
}
void MicroProfileUIClickPreset(int nIndex)
inline void MicroProfileUIClickPreset(int nIndex)
{
int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]);
int nIndexSave = nIndex - nNumPresets - 1;
@@ -2285,7 +2285,7 @@ void MicroProfileUIClickPreset(int nIndex)
}
}
void MicroProfileUIClickCustom(int nIndex)
inline void MicroProfileUIClickCustom(int nIndex)
{
if(nIndex == 0)
{
@@ -2298,13 +2298,13 @@ void MicroProfileUIClickCustom(int nIndex)
}
void MicroProfileUIClickEmpty(int nIndex)
inline void MicroProfileUIClickEmpty(int nIndex)
{
}
void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
inline void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
{
MicroProfile& S = *MicroProfileGet();
@@ -2489,7 +2489,7 @@ void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
}
void MicroProfileMoveGraph()
inline void MicroProfileMoveGraph()
{
int nZoom = UI.nMouseWheelDelta;
@@ -2536,7 +2536,7 @@ void MicroProfileMoveGraph()
UI.nOffsetY = 0;
}
void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
inline void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
{
if((uint32_t)-1 != UI.nCustomActive)
{
@@ -2633,7 +2633,7 @@ void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
}
}
}
void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
inline void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
{
MICROPROFILE_SCOPE(g_MicroProfileDraw);
MicroProfile& S = *MicroProfileGet();
@@ -3226,7 +3226,7 @@ void MicroProfileLoadPreset(const char* pSuffix)
}
}
uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
inline uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
{
for(uint32_t i = 0; i < UI.nCustomCount; ++i)
{
@@ -3238,7 +3238,7 @@ uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
return (uint32_t)-1;
}
uint32_t MicroProfileCustomGroup(const char* pCustomName)
inline uint32_t MicroProfileCustomGroup(const char* pCustomName)
{
for(uint32_t i = 0; i < UI.nCustomCount; ++i)
{
@@ -3271,7 +3271,7 @@ void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint3
UI.Custom[nIndex].nAggregateFlip = nAggregateFlip;
}
void MicroProfileCustomGroupEnable(uint32_t nIndex)
inline void MicroProfileCustomGroupEnable(uint32_t nIndex)
{
if(nIndex < UI.nCustomCount)
{

View File

@@ -203,7 +203,11 @@ endif()
target_compile_definitions(opus PRIVATE OPUS_BUILD ENABLE_HARDENING)
if(NOT MSVC)
target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=2)
if(MINGW)
target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=0)
else()
target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=2)
endif()
endif()
# It is strongly recommended to uncomment one of these VAR_ARRAYS: Use C99

View File

@@ -54,8 +54,10 @@ else()
add_compile_options(
-Wall
-Werror=implicit-fallthrough
-Werror=missing-declarations
-Werror=reorder
-Wextra
-Wmissing-declarations
-Wno-attributes
-Wno-unused-parameter
)

View File

@@ -7,9 +7,12 @@ add_library(audio_core STATIC
audio_out.h
audio_renderer.cpp
audio_renderer.h
behavior_info.cpp
behavior_info.h
buffer.h
codec.cpp
codec.h
common.h
null_sink.h
sink.h
sink_details.cpp

View File

@@ -6,6 +6,7 @@
#include "audio_core/audio_out.h"
#include "audio_core/audio_renderer.h"
#include "audio_core/codec.h"
#include "audio_core/common.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
@@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
std::size_t instance_number)
: worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
effects(params.effect_count), memory{memory_} {
behavior_info.SetUserRevision(params.revision);
audio_out = std::make_unique<AudioCore::AudioOut>();
stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
fmt::format("AudioRenderer-Instance{}", instance_number),
@@ -109,17 +110,17 @@ Stream::State AudioRenderer::GetStreamState() const {
return stream->GetState();
}
static constexpr u32 VersionFromRevision(u32_le rev) {
// "REV7" -> 7
return ((rev >> 24) & 0xff) - 0x30;
}
std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
// Copy UpdateDataHeader struct
UpdateDataHeader config{};
std::memcpy(&config, input_params.data(), sizeof(UpdateDataHeader));
u32 memory_pool_count = worker_params.effect_count + (worker_params.voice_count * 4);
if (!behavior_info.UpdateInput(input_params, sizeof(UpdateDataHeader))) {
LOG_ERROR(Audio, "Failed to update behavior info input parameters");
return Audren::ERR_INVALID_PARAMETERS;
}
// Copy MemoryPoolInfo structs
std::vector<MemoryPoolInfo> mem_pool_info(memory_pool_count);
std::memcpy(mem_pool_info.data(),
@@ -173,8 +174,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
// Copy output header
UpdateDataHeader response_data{worker_params};
std::vector<u8> output_params(response_data.total_size);
const auto audren_revision = VersionFromRevision(config.revision);
if (audren_revision >= 5) {
if (behavior_info.IsElapsedFrameCountSupported()) {
response_data.frame_count = 0x10;
response_data.total_size += 0x10;
}
@@ -200,7 +200,19 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
sizeof(EffectOutStatus));
effect_out_status_offset += sizeof(EffectOutStatus);
}
return output_params;
// Update behavior info output
const std::size_t behavior_out_status_offset{
sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
response_data.effects_size + response_data.sinks_size +
response_data.performance_manager_size};
if (!behavior_info.UpdateOutput(output_params, behavior_out_status_offset)) {
LOG_ERROR(Audio, "Failed to update behavior info output parameters");
return Audren::ERR_INVALID_PARAMETERS;
}
return MakeResult(output_params);
}
void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {

View File

@@ -8,11 +8,13 @@
#include <memory>
#include <vector>
#include "audio_core/behavior_info.h"
#include "audio_core/stream.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/kernel/object.h"
#include "core/hle/result.h"
namespace Core::Timing {
class CoreTiming;
@@ -226,7 +228,7 @@ public:
std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number);
~AudioRenderer();
std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
ResultVal<std::vector<u8>> UpdateAudioRenderer(const std::vector<u8>& input_params);
void QueueMixedBuffer(Buffer::Tag tag);
void ReleaseAndQueueBuffers();
u32 GetSampleRate() const;
@@ -237,6 +239,7 @@ public:
private:
class EffectState;
class VoiceState;
BehaviorInfo behavior_info{};
AudioRendererParameter worker_params;
std::shared_ptr<Kernel::WritableEvent> buffer_event;

View File

@@ -0,0 +1,100 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "audio_core/behavior_info.h"
#include "audio_core/common.h"
#include "common/logging/log.h"
namespace AudioCore {
BehaviorInfo::BehaviorInfo() : process_revision(CURRENT_PROCESS_REVISION) {}
BehaviorInfo::~BehaviorInfo() = default;
bool BehaviorInfo::UpdateInput(const std::vector<u8>& buffer, std::size_t offset) {
if (!CanConsumeBuffer(buffer.size(), offset, sizeof(InParams))) {
LOG_ERROR(Audio, "Buffer is an invalid size!");
return false;
}
InParams params{};
std::memcpy(&params, buffer.data() + offset, sizeof(InParams));
if (!IsValidRevision(params.revision)) {
LOG_ERROR(Audio, "Invalid input revision, revision=0x{:08X}", params.revision);
return false;
}
if (user_revision != params.revision) {
LOG_ERROR(Audio,
"User revision differs from input revision, expecting 0x{:08X} but got 0x{:08X}",
user_revision, params.revision);
return false;
}
ClearError();
UpdateFlags(params.flags);
// TODO(ogniK): Check input params size when InfoUpdater is used
return true;
}
bool BehaviorInfo::UpdateOutput(std::vector<u8>& buffer, std::size_t offset) {
if (!CanConsumeBuffer(buffer.size(), offset, sizeof(OutParams))) {
LOG_ERROR(Audio, "Buffer is an invalid size!");
return false;
}
OutParams params{};
std::memcpy(params.errors.data(), errors.data(), sizeof(ErrorInfo) * errors.size());
params.error_count = static_cast<u32_le>(error_count);
std::memcpy(buffer.data() + offset, &params, sizeof(OutParams));
return true;
}
void BehaviorInfo::ClearError() {
error_count = 0;
}
void BehaviorInfo::UpdateFlags(u64_le dest_flags) {
flags = dest_flags;
}
void BehaviorInfo::SetUserRevision(u32_le revision) {
user_revision = revision;
}
bool BehaviorInfo::IsAdpcmLoopContextBugFixed() const {
return IsRevisionSupported(2, user_revision);
}
bool BehaviorInfo::IsSplitterSupported() const {
return IsRevisionSupported(2, user_revision);
}
bool BehaviorInfo::IsLongSizePreDelaySupported() const {
return IsRevisionSupported(3, user_revision);
}
bool BehaviorInfo::IsAudioRenererProcessingTimeLimit80PercentSupported() const {
return IsRevisionSupported(5, user_revision);
}
bool BehaviorInfo::IsAudioRenererProcessingTimeLimit75PercentSupported() const {
return IsRevisionSupported(4, user_revision);
}
bool BehaviorInfo::IsAudioRenererProcessingTimeLimit70PercentSupported() const {
return IsRevisionSupported(1, user_revision);
}
bool BehaviorInfo::IsElapsedFrameCountSupported() const {
return IsRevisionSupported(5, user_revision);
}
bool BehaviorInfo::IsMemoryPoolForceMappingEnabled() const {
return (flags & 1) != 0;
}
} // namespace AudioCore

View File

@@ -0,0 +1,66 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
namespace AudioCore {
class BehaviorInfo {
public:
explicit BehaviorInfo();
~BehaviorInfo();
bool UpdateInput(const std::vector<u8>& buffer, std::size_t offset);
bool UpdateOutput(std::vector<u8>& buffer, std::size_t offset);
void ClearError();
void UpdateFlags(u64_le dest_flags);
void SetUserRevision(u32_le revision);
bool IsAdpcmLoopContextBugFixed() const;
bool IsSplitterSupported() const;
bool IsLongSizePreDelaySupported() const;
bool IsAudioRenererProcessingTimeLimit80PercentSupported() const;
bool IsAudioRenererProcessingTimeLimit75PercentSupported() const;
bool IsAudioRenererProcessingTimeLimit70PercentSupported() const;
bool IsElapsedFrameCountSupported() const;
bool IsMemoryPoolForceMappingEnabled() const;
private:
u32_le process_revision{};
u32_le user_revision{};
u64_le flags{};
struct ErrorInfo {
u32_le result{};
INSERT_PADDING_WORDS(1);
u64_le result_info{};
};
static_assert(sizeof(ErrorInfo) == 0x10, "ErrorInfo is an invalid size");
std::array<ErrorInfo, 10> errors{};
std::size_t error_count{};
struct InParams {
u32_le revision{};
u32_le padding{};
u64_le flags{};
};
static_assert(sizeof(InParams) == 0x10, "InParams is an invalid size");
struct OutParams {
std::array<ErrorInfo, 10> errors{};
u32_le error_count{};
INSERT_PADDING_BYTES(12);
};
static_assert(sizeof(OutParams) == 0xb0, "OutParams is an invalid size");
};
} // namespace AudioCore

47
src/audio_core/common.h Normal file
View File

@@ -0,0 +1,47 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/result.h"
namespace AudioCore {
namespace Audren {
constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
}
constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
static constexpr u32 VersionFromRevision(u32_le rev) {
// "REV7" -> 7
return ((rev >> 24) & 0xff) - 0x30;
}
static constexpr bool IsRevisionSupported(u32 required, u32_le user_revision) {
const auto base = VersionFromRevision(user_revision);
return required <= base;
}
static constexpr bool IsValidRevision(u32_le revision) {
const auto base = VersionFromRevision(revision);
constexpr auto max_rev = VersionFromRevision(CURRENT_PROCESS_REVISION);
return base <= max_rev;
}
static constexpr bool CanConsumeBuffer(std::size_t size, std::size_t offset, std::size_t required) {
if (offset > size) {
return false;
}
if (size < required) {
return false;
}
if ((size - offset) < required) {
return false;
}
return true;
}
} // namespace AudioCore

View File

@@ -888,7 +888,14 @@ std::string SanitizePath(std::string_view path_, DirectorySeparator directory_se
}
std::replace(path.begin(), path.end(), type1, type2);
path.erase(std::unique(path.begin(), path.end(),
auto start = path.begin();
#ifdef _WIN32
// allow network paths which start with a double backslash (e.g. \\server\share)
if (start != path.end())
++start;
#endif
path.erase(std::unique(start, path.end(),
[type2](char c1, char c2) { return c1 == type2 && c2 == type2; }),
path.end());
return std::string(RemoveTrailingSlash(path));
@@ -967,6 +974,34 @@ bool IOFile::Flush() {
return IsOpen() && 0 == std::fflush(m_file);
}
std::size_t IOFile::ReadImpl(void* data, std::size_t length, std::size_t data_size) const {
if (!IsOpen()) {
return std::numeric_limits<std::size_t>::max();
}
if (length == 0) {
return 0;
}
DEBUG_ASSERT(data != nullptr);
return std::fread(data, data_size, length, m_file);
}
std::size_t IOFile::WriteImpl(const void* data, std::size_t length, std::size_t data_size) {
if (!IsOpen()) {
return std::numeric_limits<std::size_t>::max();
}
if (length == 0) {
return 0;
}
DEBUG_ASSERT(data != nullptr);
return std::fwrite(data, data_size, length, m_file);
}
bool IOFile::Resize(u64 size) {
return IsOpen() && 0 ==
#ifdef _WIN32

View File

@@ -222,22 +222,15 @@ public:
static_assert(std::is_trivially_copyable_v<T>,
"Given array does not consist of trivially copyable objects");
if (!IsOpen()) {
return std::numeric_limits<std::size_t>::max();
}
return std::fread(data, sizeof(T), length, m_file);
return ReadImpl(data, length, sizeof(T));
}
template <typename T>
std::size_t WriteArray(const T* data, std::size_t length) {
static_assert(std::is_trivially_copyable_v<T>,
"Given array does not consist of trivially copyable objects");
if (!IsOpen()) {
return std::numeric_limits<std::size_t>::max();
}
return std::fwrite(data, sizeof(T), length, m_file);
return WriteImpl(data, length, sizeof(T));
}
template <typename T>
@@ -278,6 +271,9 @@ public:
}
private:
std::size_t ReadImpl(void* data, std::size_t length, std::size_t data_size) const;
std::size_t WriteImpl(const void* data, std::size_t length, std::size_t data_size);
std::FILE* m_file = nullptr;
};

View File

@@ -20,6 +20,7 @@
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/svc.h"
#include "core/memory.h"
#include "core/settings.h"
namespace Core {
@@ -144,6 +145,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
config.page_table_address_space_bits = address_space_bits;
config.silently_mirror_page_table = false;
config.absolute_offset_page_table = true;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
// Multi-process state
config.processor_id = core_index;
@@ -159,8 +162,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
// Unpredictable instructions
config.define_unpredictable_behaviour = true;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
// Optimizations
if (Settings::values.disable_cpu_opt) {
config.enable_optimizations = false;
config.enable_fast_dispatch = false;
}
return std::make_shared<Dynarmic::A64::Jit>(config);
}

View File

@@ -11,6 +11,7 @@
#include "core/core_timing.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/svc.h"
#include "core/memory.h"
namespace Core {
@@ -171,7 +172,17 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
// Temporarily map the code page for Unicorn
u64 map_addr{GetPC() & ~Memory::PAGE_MASK};
std::vector<u8> page_buffer(Memory::PAGE_SIZE);
system.Memory().ReadBlock(map_addr, page_buffer.data(), page_buffer.size());
CHECKED(uc_mem_map_ptr(uc, map_addr, page_buffer.size(),
UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
system.CoreTiming().AddTicks(num_instructions);
if (GDBStub::IsServerEnabled()) {
if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {

View File

@@ -1202,7 +1202,8 @@ const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager:
{S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyAreaKey),
static_cast<u64>(KeyAreaKeyType::System)}},
{"titlekek_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::Titlekek), 0}},
{"keyblob_mac_key_source", {S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC)}},
{"keyblob_mac_key_source",
{S128KeyType::Source, static_cast<u64>(SourceKeyType::KeyblobMAC), 0}},
{"tsec_key", {S128KeyType::TSEC, 0, 0}},
{"secure_boot_key", {S128KeyType::SecureBoot, 0, 0}},
{"sd_seed", {S128KeyType::SDSeed, 0, 0}},

View File

@@ -202,8 +202,8 @@ static std::array<Key128, 0x20> FindEncryptedMasterKeyFromHex(const std::vector<
return out;
}
FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
const std::string& name) {
static FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
const std::string& name) {
const auto upper = Common::ToUpper(name);
for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) {
@@ -345,8 +345,7 @@ FileSys::VirtualFile PartitionDataManager::GetPackage2Raw(Package2Type type) con
return package2.at(static_cast<size_t>(type));
}
bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) {
static bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) {
const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end());
Package2Header temp = header;
AESCipher<Key128> cipher(key, Mode::CTR);

View File

@@ -51,6 +51,17 @@ Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
return Loader::ResultStatus::Success;
}
/*static*/ ProgramMetadata ProgramMetadata::GetDefault() {
ProgramMetadata result;
result.LoadManual(
true /*is_64_bit*/, FileSys::ProgramAddressSpaceType::Is39Bit /*address_space*/,
0x2c /*main_thread_prio*/, 0 /*main_thread_core*/, 0x00100000 /*main_thread_stack_size*/,
{}, 0xFFFFFFFFFFFFFFFF /*filesystem_permissions*/, {} /*capabilities*/);
return result;
}
void ProgramMetadata::LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space,
s32 main_thread_prio, u32 main_thread_core,
u32 main_thread_stack_size, u64 title_id,

View File

@@ -44,9 +44,13 @@ public:
ProgramMetadata();
~ProgramMetadata();
/// Gets a default ProgramMetadata configuration, should only be used for homebrew formats where
/// we do not have an NPDM file
static ProgramMetadata GetDefault();
Loader::ResultStatus Load(VirtualFile file);
// Load from parameters instead of NPDM file, used for KIP
/// Load from parameters instead of NPDM file, used for KIP
void LoadManual(bool is_64_bit, ProgramAddressSpaceType address_space, s32 main_thread_prio,
u32 main_thread_core, u32 main_thread_stack_size, u64 title_id,
u64 filesystem_permissions, KernelCapabilityDescriptors capabilities);

View File

@@ -1389,10 +1389,9 @@ void SendTrap(Kernel::Thread* thread, int trap) {
return;
}
if (!halt_loop || current_thread == thread) {
current_thread = thread;
SendSignal(thread, trap);
}
current_thread = thread;
SendSignal(thread, trap);
halt_loop = true;
send_trap = false;
}

View File

@@ -282,19 +282,19 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
return RESULT_SUCCESS;
}
std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
std::vector<u8> buffer;
const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) &&
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
BufferDescriptorA()[buffer_index].Size()};
auto& memory = Core::System::GetInstance().Memory();
if (is_buffer_a) {
ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
"BufferDescriptorA invalid buffer_index {}", buffer_index);
buffer.resize(BufferDescriptorA()[buffer_index].Size());
memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
} else {
ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
"BufferDescriptorX invalid buffer_index {}", buffer_index);
buffer.resize(BufferDescriptorX()[buffer_index].Size());
memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
@@ -304,13 +304,13 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
}
std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
int buffer_index) const {
std::size_t buffer_index) const {
if (size == 0) {
LOG_WARNING(Core, "skip empty buffer write");
return 0;
}
const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) &&
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
if (size > buffer_size) {
@@ -321,13 +321,13 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
auto& memory = Core::System::GetInstance().Memory();
if (is_buffer_b) {
ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
"BufferDescriptorB invalid buffer_index {}", buffer_index);
ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size,
"BufferDescriptorB buffer_index {} is not large enough", buffer_index);
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
} else {
ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
"BufferDescriptorC invalid buffer_index {}", buffer_index);
ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size,
"BufferDescriptorC buffer_index {} is not large enough", buffer_index);
@@ -337,17 +337,17 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
return size;
}
std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
const bool is_buffer_a{BufferDescriptorA().size() > std::size_t(buffer_index) &&
std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const {
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
BufferDescriptorA()[buffer_index].Size()};
if (is_buffer_a) {
ASSERT_MSG(BufferDescriptorA().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
"BufferDescriptorA invalid buffer_index {}", buffer_index);
ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
"BufferDescriptorA buffer_index {} is empty", buffer_index);
return BufferDescriptorA()[buffer_index].Size();
} else {
ASSERT_MSG(BufferDescriptorX().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
"BufferDescriptorX invalid buffer_index {}", buffer_index);
ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
"BufferDescriptorX buffer_index {} is empty", buffer_index);
@@ -355,15 +355,15 @@ std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
}
}
std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
const bool is_buffer_b{BufferDescriptorB().size() > std::size_t(buffer_index) &&
std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) const {
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
if (is_buffer_b) {
ASSERT_MSG(BufferDescriptorB().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
"BufferDescriptorB invalid buffer_index {}", buffer_index);
return BufferDescriptorB()[buffer_index].Size();
} else {
ASSERT_MSG(BufferDescriptorC().size() > std::size_t(buffer_index),
ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
"BufferDescriptorC invalid buffer_index {}", buffer_index);
return BufferDescriptorC()[buffer_index].Size();
}

View File

@@ -179,10 +179,11 @@ public:
}
/// Helper function to read a buffer using the appropriate buffer descriptor
std::vector<u8> ReadBuffer(int buffer_index = 0) const;
std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const;
/// Helper function to write a buffer using the appropriate buffer descriptor
std::size_t WriteBuffer(const void* buffer, std::size_t size, int buffer_index = 0) const;
std::size_t WriteBuffer(const void* buffer, std::size_t size,
std::size_t buffer_index = 0) const;
/* Helper function to write a buffer using the appropriate buffer descriptor
*
@@ -194,7 +195,8 @@ public:
*/
template <typename ContiguousContainer,
typename = std::enable_if_t<!std::is_pointer_v<ContiguousContainer>>>
std::size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const {
std::size_t WriteBuffer(const ContiguousContainer& container,
std::size_t buffer_index = 0) const {
using ContiguousType = typename ContiguousContainer::value_type;
static_assert(std::is_trivially_copyable_v<ContiguousType>,
@@ -205,10 +207,10 @@ public:
}
/// Helper function to get the size of the input buffer
std::size_t GetReadBufferSize(int buffer_index = 0) const;
std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const;
/// Helper function to get the size of the output buffer
std::size_t GetWriteBufferSize(int buffer_index = 0) const;
std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const;
template <typename T>
std::shared_ptr<T> GetCopyObject(std::size_t index) {

View File

@@ -17,7 +17,7 @@ namespace Kernel::Memory {
enum class MemoryState : u32 {
None = 0,
Mask = 0xFFFFFFFF, // TODO(bunnei): This should probable be 0xFF
Mask = 0xFF,
All = ~None,
FlagCanReprotect = (1 << 8),
@@ -253,6 +253,23 @@ public:
};
}
void ShareToDevice(MemoryPermission /*new_perm*/) {
ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared ||
device_use_count == 0);
attribute |= MemoryAttribute::DeviceShared;
const u16 new_use_count{++device_use_count};
ASSERT(new_use_count > 0);
}
void UnshareToDevice(MemoryPermission /*new_perm*/) {
ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared);
const u16 prev_use_count{device_use_count--};
ASSERT(prev_use_count > 0);
if (prev_use_count == 1) {
attribute &= ~MemoryAttribute::DeviceShared;
}
}
private:
constexpr bool HasProperties(MemoryState s, MemoryPermission p, MemoryAttribute a) const {
constexpr MemoryAttribute AttributeIgnoreMask{MemoryAttribute::DontCareMask |
@@ -287,9 +304,9 @@ private:
state = new_state;
perm = new_perm;
// TODO(bunnei): Is this right?
attribute = static_cast<MemoryAttribute>(
new_attribute /*| (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared))*/);
new_attribute |
(attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared)));
}
constexpr MemoryBlock Split(VAddr split_addr) {

View File

@@ -143,6 +143,42 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState s
}
}
void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func,
MemoryPermission perm) {
const std::size_t prev_count{memory_block_tree.size()};
const VAddr end_addr{addr + num_pages * PageSize};
iterator node{memory_block_tree.begin()};
while (node != memory_block_tree.end()) {
MemoryBlock* block{&(*node)};
iterator next_node{std::next(node)};
const VAddr cur_addr{block->GetAddress()};
const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr};
if (addr < cur_end_addr && cur_addr < end_addr) {
iterator new_node{node};
if (addr > cur_addr) {
memory_block_tree.insert(node, block->Split(addr));
}
if (end_addr < cur_end_addr) {
new_node = memory_block_tree.insert(node, block->Split(end_addr));
}
lock_func(new_node, perm);
MergeAdjacent(new_node, next_node);
}
if (cur_end_addr - 1 >= end_addr - 1) {
break;
}
node = next_node;
}
}
void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) {
const_iterator it{FindIterator(start)};
MemoryInfo info{};

View File

@@ -45,6 +45,9 @@ public:
MemoryPermission perm = MemoryPermission::None,
MemoryAttribute attribute = MemoryAttribute::None);
using LockFunc = std::function<void(iterator, MemoryPermission)>;
void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, MemoryPermission perm);
using IterateFunc = std::function<void(const MemoryInfo&)>;
void IterateForRange(VAddr start, VAddr end, IterateFunc&& func);

View File

@@ -840,6 +840,50 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
return MakeResult<VAddr>(addr);
}
ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
MemoryPermission perm{};
if (const ResultCode result{CheckMemoryState(
nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
MemoryAttribute::DeviceSharedAndUncached)};
result.IsError()) {
return result;
}
block_manager->UpdateLock(addr, size / PageSize,
[perm](MemoryBlockManager::iterator block, MemoryPermission perm) {
block->ShareToDevice(perm);
},
perm);
return RESULT_SUCCESS;
}
ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
MemoryPermission perm{};
if (const ResultCode result{CheckMemoryState(
nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
MemoryAttribute::DeviceSharedAndUncached)};
result.IsError()) {
return result;
}
block_manager->UpdateLock(addr, size / PageSize,
[perm](MemoryBlockManager::iterator block, MemoryPermission perm) {
block->UnshareToDevice(perm);
},
perm);
return RESULT_SUCCESS;
}
ResultCode PageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
block_manager = std::make_unique<MemoryBlockManager>(start, end);

View File

@@ -53,6 +53,8 @@ public:
bool is_map_only, VAddr region_start,
std::size_t region_num_pages, MemoryState state,
MemoryPermission perm, PAddr map_addr = 0);
ResultCode LockForDeviceAddressSpace(VAddr addr, std::size_t size);
ResultCode UnlockForDeviceAddressSpace(VAddr addr, std::size_t size);
Common::PageTable& PageTableImpl() {
return page_table_impl;

View File

@@ -51,7 +51,7 @@ public:
}
void Free(void* obj) {
Node* node = reinterpret_cast<Node*>(obj);
Node* node = static_cast<Node*>(obj);
Node* cur_head = head.load();
do {
@@ -145,7 +145,7 @@ public:
}
T* Allocate() {
T* obj = reinterpret_cast<T*>(AllocateImpl());
T* obj = static_cast<T*>(AllocateImpl());
if (obj != nullptr) {
new (obj) T();
}

View File

@@ -150,8 +150,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
context.pc = entry_point;
context.sp = stack_top;
// TODO(merry): Perform a hardware test to determine the below value.
// AHP = 0, DN = 1, FTZ = 1, RMode = Round towards zero
context.fpcr = 0x03C00000;
context.fpcr = 0;
}
ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name,

View File

@@ -903,7 +903,7 @@ private:
void PopOutData(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");
const auto storage = applet->GetBroker().PopNormalDataToGame();
auto storage = applet->GetBroker().PopNormalDataToGame();
if (storage == nullptr) {
LOG_ERROR(Service_AM,
"storage is a nullptr. There is no data in the current normal channel");
@@ -934,7 +934,7 @@ private:
void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called");
const auto storage = applet->GetBroker().PopInteractiveDataToGame();
auto storage = applet->GetBroker().PopInteractiveDataToGame();
if (storage == nullptr) {
LOG_ERROR(Service_AM,
"storage is a nullptr. There is no data in the current interactive channel");

View File

@@ -92,11 +92,16 @@ private:
}
void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
LOG_DEBUG(Service_Audio, "(STUBBED) called");
auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer());
if (result.Succeeded()) {
ctx.WriteBuffer(result.Unwrap());
}
ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
rb.Push(result.Code());
}
void Start(Kernel::HLERequestContext& ctx) {
@@ -252,8 +257,6 @@ private:
}
void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto device_name_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(device_name_buffer);

View File

@@ -18,6 +18,7 @@
#include "core/hle/service/bcat/backend/boxcat.h"
#include "core/settings.h"
namespace Service::BCAT {
namespace {
// Prevents conflicts with windows macro called CreateFile
@@ -30,10 +31,6 @@ bool VfsDeleteFileWrap(FileSys::VirtualDir dir, std::string_view name) {
return dir->DeleteFile(name);
}
} // Anonymous namespace
namespace Service::BCAT {
constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1};
constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org";
@@ -90,8 +87,6 @@ constexpr u32 PORT = 443;
constexpr u32 TIMEOUT_SECONDS = 30;
[[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB
namespace {
std::string GetBINFilePath(u64 title_id) {
return fmt::format("{}bcat/{:016X}/launchparam.bin",
FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id);

View File

@@ -4,6 +4,7 @@
#include "core/crypto/key_manager.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/service/es/es.h"
#include "core/hle/service/service.h"
namespace Service::ES {
@@ -76,7 +77,6 @@ private:
}
void ImportTicket(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto ticket = ctx.ReadBuffer();
const auto cert = ctx.ReadBuffer(1);

View File

@@ -107,6 +107,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
switch (controller_type) {
case NPadControllerType::None:
UNREACHABLE();
break;
case NPadControllerType::Handheld:
controller.joy_styles.handheld.Assign(1);
controller.device_type.handheld.Assign(1);
@@ -363,6 +364,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
switch (controller_type) {
case NPadControllerType::None:
UNREACHABLE();
break;
case NPadControllerType::Handheld:
handheld_entry.connection_status.raw = 0;
handheld_entry.connection_status.IsWired.Assign(1);
@@ -500,7 +502,7 @@ void Controller_NPad::SetNpadMode(u32 npad_id, NPadAssignments assignment_mode)
void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids,
const std::vector<Vibration>& vibrations) {
LOG_WARNING(Service_HID, "(STUBBED) called");
LOG_DEBUG(Service_HID, "(STUBBED) called");
if (!can_controllers_vibrate) {
return;

View File

@@ -23,7 +23,7 @@ public:
standard_network_clock_sufficient_accuracy = value;
}
bool IsStandardNetworkSystemClockAccuracySufficient(Core::System& system) {
bool IsStandardNetworkSystemClockAccuracySufficient(Core::System& system) const {
SystemClockContext context{};
if (GetClockContext(system, context) != RESULT_SUCCESS) {
return {};

View File

@@ -16,6 +16,7 @@ namespace Service::Time::Clock {
class SteadyClockCore {
public:
SteadyClockCore() = default;
virtual ~SteadyClockCore() = default;
const Common::UUID& GetClockSourceId() const {
return clock_source_id;

View File

@@ -20,7 +20,7 @@ namespace Service::Time::Clock {
class SystemClockContextUpdateCallback {
public:
SystemClockContextUpdateCallback();
~SystemClockContextUpdateCallback();
virtual ~SystemClockContextUpdateCallback();
bool NeedUpdate(const SystemClockContext& value) const;

View File

@@ -9,7 +9,7 @@
namespace Service::Time::Clock {
SystemClockCore::SystemClockCore(SteadyClockCore& steady_clock_core)
: steady_clock_core{steady_clock_core}, is_initialized{} {
: steady_clock_core{steady_clock_core} {
context.steady_time_point.clock_source_id = steady_clock_core.GetClockSourceId();
}

View File

@@ -22,7 +22,7 @@ class SystemClockContextUpdateCallback;
class SystemClockCore {
public:
explicit SystemClockCore(SteadyClockCore& steady_clock_core);
~SystemClockCore();
virtual ~SystemClockCore();
SteadyClockCore& GetSteadyClockCore() const {
return steady_clock_core;

View File

@@ -20,8 +20,8 @@ namespace Service::Time {
class ISystemClock final : public ServiceFramework<ISystemClock> {
public:
ISystemClock(Clock::SystemClockCore& clock_core)
: ServiceFramework("ISystemClock"), clock_core{clock_core} {
explicit ISystemClock(Clock::SystemClockCore& clock_core, Core::System& system)
: ServiceFramework("ISystemClock"), clock_core{clock_core}, system{system} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &ISystemClock::GetCurrentTime, "GetCurrentTime"},
@@ -46,9 +46,8 @@ private:
}
s64 posix_time{};
if (const ResultCode result{
clock_core.GetCurrentTime(Core::System::GetInstance(), posix_time)};
result != RESULT_SUCCESS) {
if (const ResultCode result{clock_core.GetCurrentTime(system, posix_time)};
result.IsError()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
@@ -69,9 +68,8 @@ private:
}
Clock::SystemClockContext system_clock_context{};
if (const ResultCode result{
clock_core.GetClockContext(Core::System::GetInstance(), system_clock_context)};
result != RESULT_SUCCESS) {
if (const ResultCode result{clock_core.GetClockContext(system, system_clock_context)};
result.IsError()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
@@ -83,12 +81,13 @@ private:
}
Clock::SystemClockCore& clock_core;
Core::System& system;
};
class ISteadyClock final : public ServiceFramework<ISteadyClock> {
public:
ISteadyClock(Clock::SteadyClockCore& clock_core)
: ServiceFramework("ISteadyClock"), clock_core{clock_core} {
explicit ISteadyClock(Clock::SteadyClockCore& clock_core, Core::System& system)
: ServiceFramework("ISteadyClock"), clock_core{clock_core}, system{system} {
static const FunctionInfo functions[] = {
{0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"},
};
@@ -105,14 +104,14 @@ private:
return;
}
const Clock::SteadyClockTimePoint time_point{
clock_core.GetCurrentTimePoint(Core::System::GetInstance())};
const Clock::SteadyClockTimePoint time_point{clock_core.GetCurrentTimePoint(system)};
IPC::ResponseBuilder rb{ctx, (sizeof(Clock::SteadyClockTimePoint) / 4) + 2};
rb.Push(RESULT_SUCCESS);
rb.PushRaw(time_point);
}
Clock::SteadyClockCore& clock_core;
Core::System& system;
};
ResultCode Module::Interface::GetClockSnapshotFromSystemClockContextInternal(
@@ -134,7 +133,7 @@ ResultCode Module::Interface::GetClockSnapshotFromSystemClockContextInternal(
}
const auto current_time_point{
time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(Core::System::GetInstance())};
time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)};
if (const ResultCode result{Clock::ClockSnapshot::GetCurrentTime(
clock_snapshot.user_time, current_time_point, clock_snapshot.user_context)};
result != RESULT_SUCCESS) {
@@ -176,21 +175,24 @@ void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ct
LOG_DEBUG(Service_Time, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardUserSystemClockCore());
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardUserSystemClockCore(),
system);
}
void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Time, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardNetworkSystemClockCore());
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardNetworkSystemClockCore(),
system);
}
void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Time, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<ISteadyClock>(module->GetTimeManager().GetStandardSteadyClockCore());
rb.PushIpcInterface<ISteadyClock>(module->GetTimeManager().GetStandardSteadyClockCore(),
system);
}
void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) {
@@ -204,7 +206,8 @@ void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& c
LOG_DEBUG(Service_Time, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardLocalSystemClockCore());
rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardLocalSystemClockCore(),
system);
}
void Module::Interface::IsStandardNetworkSystemClockAccuracySufficient(
@@ -228,8 +231,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
IPC::RequestParser rp{ctx};
const auto context{rp.PopRaw<Clock::SystemClockContext>()};
const auto current_time_point{
steady_clock_core.GetCurrentTimePoint(Core::System::GetInstance())};
const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
const auto ticks{Clock::TimeSpanType::FromTicks(
@@ -255,8 +257,8 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
Clock::SystemClockContext user_context{};
if (const ResultCode result{
module->GetTimeManager().GetStandardUserSystemClockCore().GetClockContext(
Core::System::GetInstance(), user_context)};
result != RESULT_SUCCESS) {
system, user_context)};
result.IsError()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
@@ -264,8 +266,8 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
Clock::SystemClockContext network_context{};
if (const ResultCode result{
module->GetTimeManager().GetStandardNetworkSystemClockCore().GetClockContext(
Core::System::GetInstance(), network_context)};
result != RESULT_SUCCESS) {
system, network_context)};
result.IsError()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;
@@ -274,7 +276,7 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
Clock::ClockSnapshot clock_snapshot{};
if (const ResultCode result{GetClockSnapshotFromSystemClockContextInternal(
&ctx.GetThread(), user_context, network_context, type, clock_snapshot)};
result != RESULT_SUCCESS) {
result.IsError()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(result);
return;

View File

@@ -518,8 +518,8 @@ static bool ParseTimeZoneBinary(TimeZoneRule& time_zone_rule, FileSys::VirtualFi
constexpr s32 time_zone_max_leaps{50};
constexpr s32 time_zone_max_chars{50};
if (!(0 <= header.leap_count && header.leap_count < time_zone_max_leaps &&
0 < header.type_count && header.type_count < time_zone_rule.ttis.size() &&
0 <= header.time_count && header.time_count < time_zone_rule.ats.size() &&
0 < header.type_count && header.type_count < s32(time_zone_rule.ttis.size()) &&
0 <= header.time_count && header.time_count < s32(time_zone_rule.ats.size()) &&
0 <= header.char_count && header.char_count < time_zone_max_chars &&
(header.ttis_std_count == header.type_count || header.ttis_std_count == 0) &&
(header.ttis_gmt_count == header.type_count || header.ttis_gmt_count == 0))) {

View File

@@ -398,6 +398,11 @@ AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) {
Kernel::CodeSet codeset = elf_reader.LoadInto(base_address);
const VAddr entry_point = codeset.entrypoint;
// Setup the process code layout
if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), buffer.size()).IsError()) {
return {ResultStatus::ErrorNotInitialized, {}};
}
process.LoadModule(std::move(codeset), entry_point);
is_loaded = true;

View File

@@ -131,7 +131,7 @@ static constexpr u32 PageAlignSize(u32 size) {
}
static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
const std::string& name, VAddr load_base) {
const std::string& name) {
if (data.size() < sizeof(NroHeader)) {
return {};
}
@@ -187,19 +187,25 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
codeset.DataSegment().size += bss_size;
program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
// Setup the process code layout
if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size())
.IsError()) {
return false;
}
// Load codeset for current process
codeset.memory = std::move(program_image);
process.LoadModule(std::move(codeset), load_base);
process.LoadModule(std::move(codeset), process.PageTable().GetCodeRegionStart());
// Register module with GDBStub
GDBStub::RegisterModule(name, load_base, load_base);
GDBStub::RegisterModule(name, process.PageTable().GetCodeRegionStart(),
process.PageTable().GetCodeRegionEnd());
return true;
}
bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file,
VAddr load_base) {
return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base);
bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& file) {
return LoadNroImpl(process, file.ReadAllBytes(), file.GetName());
}
AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
@@ -207,10 +213,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
return {ResultStatus::ErrorAlreadyLoaded, {}};
}
// Load NRO
const VAddr base_address = process.PageTable().GetCodeRegionStart();
if (!LoadNro(process, *file, base_address)) {
if (!LoadNro(process, *file)) {
return {ResultStatus::ErrorLoadingNRO, {}};
}

View File

@@ -47,7 +47,7 @@ public:
bool IsRomFSUpdatable() const override;
private:
bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file, VAddr load_base);
bool LoadNro(Kernel::Process& process, const FileSys::VfsFile& file);
std::vector<u8> icon_data;
std::unique_ptr<FileSys::NACP> nacp;

View File

@@ -37,7 +37,7 @@ static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
const NSOSegmentHeader& header) {
const std::vector<u8> uncompressed_data =
std::vector<u8> uncompressed_data =
Common::Compression::DecompressDataLZ4(compressed_data, header.size);
ASSERT_MSG(uncompressed_data.size() == header.size, "{} != {}", header.size,

View File

@@ -92,7 +92,7 @@ void LogSettings() {
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
LogSetting("Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
@@ -109,4 +109,12 @@ void LogSettings() {
LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
}
bool IsGPULevelExtreme() {
return values.gpu_accuracy == GPUAccuracy::Extreme;
}
bool IsGPULevelHigh() {
return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
}
} // namespace Settings

View File

@@ -376,6 +376,12 @@ enum class RendererBackend {
Vulkan = 1,
};
enum class GPUAccuracy : u32 {
Normal = 0,
High = 1,
Extreme = 2,
};
struct Values {
// System
bool use_docked_mode;
@@ -436,7 +442,7 @@ struct Values {
bool use_frame_limit;
u16 frame_limit;
bool use_disk_shader_cache;
bool use_accurate_gpu_emulation;
GPUAccuracy gpu_accuracy;
bool use_asynchronous_gpu_emulation;
bool use_vsync;
bool force_30fps_mode;
@@ -464,6 +470,7 @@ struct Values {
bool dump_nso;
bool reporting_services;
bool quest_flag;
bool disable_cpu_opt;
// BCAT
std::string bcat_backend;
@@ -479,6 +486,9 @@ struct Values {
std::map<u64, std::vector<std::string>> disabled_addons;
} extern values;
bool IsGPULevelExtreme();
bool IsGPULevelHigh();
void Apply();
void LogSettings();
} // namespace Settings

View File

@@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
return "Unknown";
}
static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
switch (backend) {
case Settings::GPUAccuracy::Normal:
return "Normal";
case Settings::GPUAccuracy::High:
return "High";
case Settings::GPUAccuracy::Extreme:
return "Extreme";
}
return "Unknown";
}
u64 GetTelemetryId() {
u64 telemetry_id{};
const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
AddField(field_type, "Renderer_UseAccurateGpuEmulation",
Settings::values.use_accurate_gpu_emulation);
AddField(field_type, "Renderer_GPUAccuracyLevel",
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);

View File

@@ -14,13 +14,14 @@
#include "core/core.h"
#include "core/core_timing.h"
namespace {
// Numbers are chosen randomly to make sure the correct one is given.
static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
static std::bitset<CB_IDS.size()> callbacks_ran_flags;
static u64 expected_callback = 0;
static s64 lateness = 0;
std::bitset<CB_IDS.size()> callbacks_ran_flags;
u64 expected_callback = 0;
s64 lateness = 0;
template <unsigned int IDX>
void CallbackTemplate(u64 userdata, s64 cycles_late) {
@@ -31,7 +32,7 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
REQUIRE(lateness == cycles_late);
}
static u64 callbacks_done = 0;
u64 callbacks_done = 0;
void EmptyCallback(u64 userdata, s64 cycles_late) {
++callbacks_done;
@@ -48,8 +49,8 @@ struct ScopeInit final {
Core::Timing::CoreTiming core_timing;
};
static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
int expected_lateness = 0, int cpu_downcount = 0) {
void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
int expected_lateness = 0, int cpu_downcount = 0) {
callbacks_ran_flags = 0;
expected_callback = CB_IDS[idx];
lateness = expected_lateness;
@@ -62,6 +63,7 @@ static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
}
} // Anonymous namespace
TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
ScopeInit guard;

View File

@@ -23,6 +23,7 @@ add_library(video_core STATIC
engines/shader_bytecode.h
engines/shader_header.h
engines/shader_type.h
fence_manager.h
gpu.cpp
gpu.h
gpu_asynch.cpp
@@ -51,6 +52,8 @@ add_library(video_core STATIC
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
renderer_opengl/gl_framebuffer_cache.cpp
renderer_opengl/gl_framebuffer_cache.h
renderer_opengl/gl_rasterizer.cpp
@@ -160,6 +163,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/nsight_aftermath_tracker.cpp
renderer_vulkan/nsight_aftermath_tracker.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
@@ -174,6 +179,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_fence_manager.cpp
renderer_vulkan/vk_fence_manager.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
@@ -213,19 +220,30 @@ if (ENABLE_VULKAN)
renderer_vulkan/wrapper.cpp
renderer_vulkan/wrapper.h
)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad)
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
target_link_libraries(video_core PRIVATE sirit)
endif()
if (ENABLE_NSIGHT_AFTERMATH)
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
endif()
if (NOT WIN32)
message(ERROR "Nsight Aftermath doesn't support non-Windows platforms")
endif()
target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH)
target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include")
endif()
if (MSVC)
target_compile_options(video_core PRIVATE /we4267)
else()

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
@@ -18,8 +19,10 @@
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h"
@@ -79,6 +82,9 @@ public:
auto map = MapAddress(block, gpu_addr, cpu_addr, size);
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(map);
}
if (!map->IsWritten()) {
map->MarkAsWritten(true);
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -137,11 +143,22 @@ public:
});
for (auto& object : objects) {
if (object->IsModified() && object->IsRegistered()) {
mutex.unlock();
FlushMap(object);
mutex.lock();
}
}
}
bool MustFlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
return map->IsModified() && map->IsRegistered();
});
}
/// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
@@ -154,6 +171,77 @@ public:
}
}
void OnCPUWrite(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& object : GetMapsInRange(addr, size)) {
if (object->IsMemoryMarked() && object->IsRegistered()) {
UnmarkMemory(object);
object->SetSyncPending(true);
marked_for_unregister.emplace_back(object);
}
}
}
void SyncGuestHost() {
std::lock_guard lock{mutex};
for (const auto& object : marked_for_unregister) {
if (object->IsRegistered()) {
object->SetSyncPending(false);
Unregister(object);
}
}
marked_for_unregister.clear();
}
void CommitAsyncFlushes() {
if (uncommitted_flushes) {
auto commit_list = std::make_shared<std::list<MapInterval>>();
for (auto& map : *uncommitted_flushes) {
if (map->IsRegistered() && map->IsModified()) {
// TODO(Blinkhawk): Implement backend asynchronous flushing
// AsyncFlushMap(map)
commit_list->push_back(map);
}
}
if (!commit_list->empty()) {
committed_flushes.push_back(commit_list);
} else {
committed_flushes.emplace_back();
}
} else {
committed_flushes.emplace_back();
}
uncommitted_flushes.reset();
}
bool ShouldWaitAsyncFlushes() const {
return !committed_flushes.empty() && committed_flushes.front() != nullptr;
}
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
}
void PopAsyncFlushes() {
if (committed_flushes.empty()) {
return;
}
auto& flush_list = committed_flushes.front();
if (!flush_list) {
committed_flushes.pop_front();
return;
}
for (MapInterval& map : *flush_list) {
if (map->IsRegistered()) {
// TODO(Blinkhawk): Replace this for reading the asynchronous flush
FlushMap(map);
}
}
committed_flushes.pop_front();
}
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
protected:
@@ -196,17 +284,30 @@ protected:
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
mapped_addresses.insert({interval, new_map});
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
new_map->SetMemoryMarked(true);
if (inherit_written) {
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
new_map->MarkAsWritten(true);
}
}
/// Unregisters an object from the cache
void Unregister(MapInterval& map) {
void UnmarkMemory(const MapInterval& map) {
if (!map->IsMemoryMarked()) {
return;
}
const std::size_t size = map->GetEnd() - map->GetStart();
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
map->SetMemoryMarked(false);
}
/// Unregisters an object from the cache
void Unregister(const MapInterval& map) {
UnmarkMemory(map);
map->MarkAsRegistered(false);
if (map->IsSyncPending()) {
marked_for_unregister.remove(map);
map->SetSyncPending(false);
}
if (map->IsWritten()) {
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
}
@@ -264,6 +365,9 @@ private:
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
if (modified_inheritance) {
new_map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(new_map);
}
}
Register(new_map, write_inheritance);
return new_map;
@@ -450,6 +554,13 @@ private:
return false;
}
void MarkForAsyncFlush(MapInterval& map) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
}
uncommitted_flushes->insert(map);
}
VideoCore::RasterizerInterface& rasterizer;
Core::System& system;
@@ -479,6 +590,10 @@ private:
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
std::list<MapInterval> marked_for_unregister;
std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
std::recursive_mutex mutex;
};

View File

@@ -46,6 +46,22 @@ public:
return is_registered;
}
void SetMemoryMarked(bool is_memory_marked_) {
is_memory_marked = is_memory_marked_;
}
bool IsMemoryMarked() const {
return is_memory_marked;
}
void SetSyncPending(bool is_sync_pending_) {
is_sync_pending = is_sync_pending_;
}
bool IsSyncPending() const {
return is_sync_pending;
}
VAddr GetStart() const {
return start;
}
@@ -83,6 +99,8 @@ private:
bool is_written{};
bool is_modified{};
bool is_registered{};
bool is_memory_marked{};
bool is_sync_pending{};
u64 ticks{};
};

View File

@@ -12,7 +12,7 @@
namespace Tegra {
DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {}
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
DmaPusher::~DmaPusher() = default;
@@ -21,17 +21,20 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
// On entering GPU code, assume all memory may be touched by the ARM core.
gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0;
while (Core::System::GetInstance().IsPoweredOn()) {
while (system.IsPoweredOn()) {
if (!Step()) {
break;
}
}
gpu.FlushCommands();
gpu.SyncGuestHost();
gpu.OnCommandListEnd();
}
bool DmaPusher::Step() {

View File

@@ -10,6 +10,10 @@
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Core {
class System;
}
namespace Tegra {
enum class SubmissionMode : u32 {
@@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
*/
class DmaPusher {
public:
explicit DmaPusher(GPU& gpu);
explicit DmaPusher(Core::System& system, GPU& gpu);
~DmaPusher();
void Push(CommandList&& entries) {
@@ -72,8 +76,6 @@ private:
void CallMethod(u32 argument) const;
GPU& gpu;
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
@@ -92,6 +94,9 @@ private:
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
GPU& gpu;
Core::System& system;
};
} // namespace Tegra

View File

@@ -28,7 +28,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
}
}
std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
const u32 line_a = src_2 - src_1;
const u32 line_b = dst_2 - dst_1;
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);

View File

@@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
color_mask.A.Assign(1);
}
for (auto& format : regs.vertex_attrib_format) {
format.constant.Assign(1);
}
// NVN games expect these values to be enabled at boot
regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1;
@@ -400,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
if (regs.query.query_get.fence == 1) {
rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
}
break;
case Regs::QueryOperation::Acquire:
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -479,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 increment = regs.sync_info.increment.Value();
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
system.GPU().IncrementSyncPoint(sync_point);
rasterizer.SignalSyncPoint(sync_point);
}
}

View File

@@ -1149,7 +1149,7 @@ public:
/// Returns whether the vertex array specified by index is supposed to be
/// accessed per instance or not.
bool IsInstancingEnabled(u32 index) const {
bool IsInstancingEnabled(std::size_t index) const {
return is_instanced[index];
}
} instanced_arrays;

View File

@@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
}
Texture::UnswizzleSubrect(
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
@@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size);
}
if (Settings::values.use_accurate_gpu_emulation) {
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else {

View File

@@ -0,0 +1,170 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <memory>
#include <queue>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
class FenceBase {
public:
FenceBase(u32 payload, bool is_stubbed)
: address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
: address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
GPUVAddr GetAddress() const {
return address;
}
u32 GetPayload() const {
return payload;
}
bool IsSemaphore() const {
return is_semaphore;
}
private:
GPUVAddr address;
u32 payload;
bool is_semaphore;
protected:
bool is_stubbed;
};
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
class FenceManager {
public:
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(addr, value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
}
void SignalSyncPoint(u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
}
void WaitPendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
}
}
protected:
FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
TTextureCache& texture_cache, TTBufferCache& buffer_cache,
TQueryCache& query_cache)
: system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
buffer_cache{buffer_cache}, query_cache{query_cache} {}
virtual ~FenceManager() {}
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
virtual bool IsFenceSignaled(TFence& fence) const = 0;
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
return;
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
}
}
bool ShouldWait() const {
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
query_cache.ShouldWaitAsyncFlushes();
}
bool ShouldFlush() const {
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
query_cache.CommitAsyncFlushes();
}
std::queue<TFence> fences;
};
} // namespace VideoCommon

View File

@@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render
: system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
auto& rasterizer{renderer->Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
std::unique_lock lck{flush_request_mutex};
const u64 fence = ++last_flush_fence;
flush_requests.emplace_back(fence, addr, size);
return fence;
}
void GPU::TickWork() {
std::unique_lock lck{flush_request_mutex};
while (!flush_requests.empty()) {
auto& request = flush_requests.front();
const u64 fence = request.fence;
const VAddr addr = request.addr;
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
renderer->Rasterizer().FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
}
u64 GPU::GetTicks() const {
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
@@ -142,6 +164,13 @@ void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands();
}
void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.

View File

@@ -155,7 +155,23 @@ public:
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost();
/// Signal the ending of command list.
virtual void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU.
u64 RequestFlush(VAddr addr, std::size_t size);
/// Obtains current flush request fence id.
u64 CurrentFlushRequestFence() const {
return current_flush_fence.load(std::memory_order_relaxed);
}
/// Tick pending requests within the GPU.
void TickWork();
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
@@ -325,6 +341,19 @@ private:
std::condition_variable sync_cv;
struct FlushRequest {
FlushRequest(u64 fence, VAddr addr, std::size_t size)
: fence{fence}, addr{addr}, size{size} {}
u64 fence;
VAddr addr;
std::size_t size;
};
std::list<FlushRequest> flush_requests;
std::atomic<u64> current_flush_fence{};
u64 last_flush_fence{};
std::mutex flush_request_mutex;
const bool is_async;
};

View File

@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle();
}
void GPUAsynch::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace VideoCommon

View File

@@ -32,6 +32,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override;
void OnCommandListEnd() override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;

View File

@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -14,8 +15,9 @@
namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher, SynchState& state) {
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
// Wait for first GPU command before acquiring the window context
@@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
renderer.Rasterizer().ReleaseFences();
} else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
system.GPU().TickWork();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
@@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
std::ref(state)};
thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
std::ref(context), std::ref(dma_pusher), std::ref(state)};
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
PushCommand(FlushRegionCommand(addr, size));
if (!Settings::IsGPULevelHigh()) {
PushCommand(FlushRegionCommand(addr, size));
return;
}
if (!Settings::IsGPULevelExtreme()) {
return;
}
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand());
while (fence > gpu.CurrentFlushRequestFence()) {
}
}
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().InvalidateRegion(addr, size);
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size);
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
}
void ThreadManager::WaitIdle() const {
@@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
}
}
void ThreadManager::OnCommandListEnd() {
PushCommand(OnCommandListEndCommand());
}
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));

View File

@@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
u64 size;
};
/// Command called within the gpu, to schedule actions after a command list end
struct OnCommandListEndCommand final {};
/// Command to make the gpu look into pending requests
struct GPUTickCommand final {};
using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
GPUTickCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
@@ -122,6 +129,8 @@ public:
// Wait until the gpu thread is idle.
void WaitIdle() const;
void OnCommandListEnd();
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);

View File

@@ -51,11 +51,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
ASSERT(system.CurrentProcess()
->PageTable()
.SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared,
Kernel::Memory::MemoryAttribute::DeviceShared)
.IsSuccess());
ASSERT(
system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
return gpu_addr;
}
@@ -66,11 +63,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
const u64 aligned_size{Common::AlignUp(size, page_size)};
MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
ASSERT(system.CurrentProcess()
->PageTable()
.SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared,
Kernel::Memory::MemoryAttribute::DeviceShared)
.IsSuccess());
ASSERT(
system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
return gpu_addr;
}
@@ -87,9 +81,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
->PageTable()
.SetMemoryAttribute(cpu_addr.value(), size,
Kernel::Memory::MemoryAttribute::DeviceShared,
Kernel::Memory::MemoryAttribute::None)
.UnlockForDeviceAddressSpace(cpu_addr.value(), size)
.IsSuccess());
return gpu_addr;

View File

@@ -12,10 +12,12 @@
#include <mutex>
#include <optional>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "common/assert.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -130,6 +132,9 @@ public:
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation) {
AsyncFlushQuery(cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -170,6 +175,37 @@ public:
return streams[static_cast<std::size_t>(type)];
}
void CommitAsyncFlushes() {
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
if (committed_flushes.empty()) {
return false;
}
return committed_flushes.front() != nullptr;
}
void PopAsyncFlushes() {
if (committed_flushes.empty()) {
return;
}
auto& flush_list = committed_flushes.front();
if (!flush_list) {
committed_flushes.pop_front();
return;
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
}
committed_flushes.pop_front();
}
protected:
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
@@ -224,6 +260,13 @@ private:
return found != std::end(contents) ? &*found : nullptr;
}
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
}
uncommitted_flushes->insert(addr);
}
static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_SHIFT = 12;
@@ -235,6 +278,9 @@ private:
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
};
template <class QueryCache, class HostCounter>

View File

@@ -49,15 +49,33 @@ public:
/// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
/// Release all pending fences.
virtual void ReleaseFences() = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Check if the the specified memory area requires flushing to CPU Memory.
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
virtual void SyncGuestHost() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;

View File

@@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
}
void OGLBufferCache::WriteBarrier() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}

View File

@@ -0,0 +1,72 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
GLInnerFence::~GLInnerFence() = default;
void GLInnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle == 0);
sync_object.Create();
}
bool GLInnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(sync_object.handle != 0);
GLsizei length;
GLint sync_status;
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
return sync_status == GL_SIGNALED;
}
void GLInnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle != 0);
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManagerOpenGL::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace OpenGL

View File

@@ -0,0 +1,53 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
GLInnerFence(u32 payload, bool is_stubbed);
GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
~GLInnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
OGLSync sync_object;
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
};
} // namespace OpenGL

View File

@@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
ScreenInfo& info, GLShader::ProgramManager& program_manager,
StateTracker& state_tracker)
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions();
}
@@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
EndTransformFeedback();
++num_queued_commands;
system.GPU().TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
@@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
shader_cache.InvalidateRegion(addr, size);
buffer_cache.OnCPUWrite(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
auto& memory_manager{gpu.MemoryManager()};
memory_manager.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
}
fence_manager.SignalSyncPoint(value);
}
void RasterizerOpenGL::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.use_accurate_gpu_emulation) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size);
}
InvalidateRegion(addr, size);

View File

@@ -23,6 +23,7 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -66,7 +67,13 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
@@ -222,6 +229,8 @@ private:
SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
Core::System& system;
ScreenInfo& screen_info;
@@ -229,7 +238,6 @@ private:
StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
GLint vertex_binding = 0;

View File

@@ -34,8 +34,6 @@
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::CompileDepth;
using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
@@ -45,7 +43,7 @@ namespace {
constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -450,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// Look up shader in the cache based on address
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
@@ -479,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const std::size_t size_in_bytes = code.size() * sizeof(u64);
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(shader);
if (cpu_addr) {
Register(shader);
} else {
null_shader = shader;
}
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
@@ -488,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
if (kernel) {
return kernel;
}
@@ -509,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(kernel);
if (cpu_addr) {
Register(kernel);
} else {
null_kernel = kernel;
}
return kernel;
}

View File

@@ -125,6 +125,9 @@ private:
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
Shader null_shader{};
Shader null_kernel{};
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};

View File

@@ -484,7 +484,7 @@ private:
code.AddLine("switch (jmp_to) {{");
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
const auto& [address, bb] = pair;
code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
@@ -1145,6 +1145,7 @@ private:
return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
default:
UNREACHABLE();
return {"0", Type::Int};
}
case Attribute::Index::FrontColor:
return {"gl_Color"s + GetSwizzle(element), Type::Float};
@@ -1483,8 +1484,8 @@ private:
dy += '(';
for (std::size_t index = 0; index < components; ++index) {
const auto operand_x{derivates.at(index * 2)};
const auto operand_y{derivates.at(index * 2 + 1)};
const auto& operand_x{derivates.at(index * 2)};
const auto& operand_y{derivates.at(index * 2 + 1)};
dx += Visit(operand_x).AsFloat();
dy += Visit(operand_y).AsFloat();

View File

@@ -191,6 +191,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_LINEAR_MIPMAP_LINEAR;
}
break;
}
case Tegra::Texture::TextureFilter::Nearest: {
switch (mip_filter_mode) {
@@ -201,6 +202,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_NEAREST_MIPMAP_LINEAR;
}
break;
}
}
LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));

View File

@@ -2,10 +2,12 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <tuple>
#include <boost/functional/hash.hpp>
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -13,289 +15,352 @@ namespace Vulkan {
namespace {
constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) {
const FixedPipelineState::StencilFace front_stencil(
regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass,
regs.stencil_front_func_func);
const FixedPipelineState::StencilFace back_stencil =
regs.stencil_two_side_enable
? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail,
regs.stencil_back_op_zpass,
regs.stencil_back_func_func)
: front_stencil;
return FixedPipelineState::DepthStencil(
regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1,
regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil);
}
constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) {
return FixedPipelineState::InputAssembly(
regs.draw.topology, regs.primitive_restart.enabled,
regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f);
}
constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState(
const Maxwell& regs, std::size_t render_target) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target];
const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0};
const FixedPipelineState::BlendingAttachment default_blending(
false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::Zero, components);
if (render_target >= regs.rt_control.count) {
return default_blending;
}
if (!regs.independent_blend_enable) {
const auto& src = regs.blend;
if (!src.enable[render_target]) {
return default_blending;
}
return FixedPipelineState::BlendingAttachment(
true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
src.factor_source_a, src.factor_dest_a, components);
}
if (!regs.blend.enable[render_target]) {
return default_blending;
}
const auto& src = regs.independent_blend[render_target];
return FixedPipelineState::BlendingAttachment(
true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
src.factor_source_a, src.factor_dest_a, components);
}
constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) {
return FixedPipelineState::ColorBlending(
{regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a},
regs.rt_control.count,
{GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1),
GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3),
GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5),
GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)});
}
constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) {
return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim,
regs.tess_mode.spacing, regs.tess_mode.cw != 0);
}
constexpr std::size_t Point = 0;
constexpr std::size_t Line = 1;
constexpr std::size_t Polygon = 2;
constexpr std::array PolygonOffsetEnableLUT = {
Point, // Points
Line, // Lines
Line, // LineLoop
Line, // LineStrip
Polygon, // Triangles
Polygon, // TriangleStrip
Polygon, // TriangleFan
Polygon, // Quads
Polygon, // QuadStrip
Polygon, // Polygon
Line, // LinesAdjacency
Line, // LineStripAdjacency
Polygon, // TrianglesAdjacency
Polygon, // TriangleStripAdjacency
Polygon, // Patches
constexpr std::size_t POINT = 0;
constexpr std::size_t LINE = 1;
constexpr std::size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) {
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
const auto& clip = regs.view_volume_clip_control;
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
Maxwell::FrontFace front_face = regs.front_face;
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) {
if (front_face == Maxwell::FrontFace::CounterClockWise)
front_face = Maxwell::FrontFace::ClockWise;
else if (front_face == Maxwell::FrontFace::ClockWise)
front_face = Maxwell::FrontFace::CounterClockWise;
}
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled,
depth_clamp_enabled, gl_ndc, regs.cull_face, front_face);
}
} // Anonymous namespace
std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept {
return (index << stride) ^ divisor;
}
bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept {
return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor);
}
std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept {
return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^
(static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^
(static_cast<std::size_t>(offset) << 36);
}
bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept {
return std::tie(index, buffer, type, size, offset) ==
std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset);
}
std::size_t FixedPipelineState::StencilFace::Hash() const noexcept {
return static_cast<std::size_t>(action_stencil_fail) ^
(static_cast<std::size_t>(action_depth_fail) << 4) ^
(static_cast<std::size_t>(action_depth_fail) << 20) ^
(static_cast<std::size_t>(action_depth_pass) << 36);
}
bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept {
return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) ==
std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass,
rhs.test_func);
}
std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept {
return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^
(static_cast<std::size_t>(src_rgb_func) << 10) ^
(static_cast<std::size_t>(dst_rgb_func) << 15) ^
(static_cast<std::size_t>(a_equation) << 20) ^
(static_cast<std::size_t>(src_a_func) << 25) ^
(static_cast<std::size_t>(dst_a_func) << 30) ^
(static_cast<std::size_t>(components[0]) << 35) ^
(static_cast<std::size_t>(components[1]) << 36) ^
(static_cast<std::size_t>(components[2]) << 37) ^
(static_cast<std::size_t>(components[3]) << 38);
}
bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const
noexcept {
return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func,
dst_a_func, components) ==
std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func,
rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components);
}
std::size_t FixedPipelineState::VertexInput::Hash() const noexcept {
std::size_t hash = num_bindings ^ (num_attributes << 32);
for (std::size_t i = 0; i < num_bindings; ++i) {
boost::hash_combine(hash, bindings[i].Hash());
void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept {
raw = 0;
front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail));
front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail));
front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass));
front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func));
if (regs.stencil_two_side_enable) {
back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail));
back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail));
back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass));
back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func));
} else {
back.action_stencil_fail.Assign(front.action_stencil_fail);
back.action_depth_fail.Assign(front.action_depth_fail);
back.action_depth_pass.Assign(front.action_depth_pass);
back.test_func.Assign(front.test_func);
}
for (std::size_t i = 0; i < num_attributes; ++i) {
boost::hash_combine(hash, attributes[i].Hash());
depth_test_enable.Assign(regs.depth_test_enable);
depth_write_enable.Assign(regs.depth_write_enabled);
depth_bounds_enable.Assign(regs.depth_bounds_enable);
stencil_enable.Assign(regs.stencil_enable);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
}
void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
const auto& clip = regs.view_volume_clip_control;
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
return hash;
raw = 0;
topology.Assign(topology_index);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
cull_face.Assign(PackCullFace(regs.cull_face));
front_face.Assign(packed_front_face);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tess_mode.spacing.Value()));
tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
}
bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept {
return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(),
rhs.bindings.begin() + rhs.num_bindings) &&
std::equal(attributes.begin(), attributes.begin() + num_attributes,
rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes);
}
std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept {
std::size_t point_size_int = 0;
std::memcpy(&point_size_int, &point_size, sizeof(point_size));
return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^
static_cast<std::size_t>(primitive_restart_enable);
}
bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept {
return std::tie(topology, primitive_restart_enable, point_size) ==
std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size);
}
std::size_t FixedPipelineState::Tessellation::Hash() const noexcept {
return static_cast<std::size_t>(patch_control_points) ^
(static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^
(static_cast<std::size_t>(clockwise) << 10);
}
bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept {
return std::tie(patch_control_points, primitive, spacing, clockwise) ==
std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise);
}
std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
return static_cast<std::size_t>(cull_enable) ^
(static_cast<std::size_t>(depth_bias_enable) << 1) ^
(static_cast<std::size_t>(depth_clamp_enable) << 2) ^
(static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
(static_cast<std::size_t>(cull_face) << 24) ^
(static_cast<std::size_t>(front_face) << 48);
}
bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
cull_face, front_face) ==
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
}
std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^
(static_cast<std::size_t>(depth_write_enable) << 1) ^
(static_cast<std::size_t>(depth_bounds_enable) << 2) ^
(static_cast<std::size_t>(stencil_enable) << 3) ^
(static_cast<std::size_t>(depth_test_function) << 4);
boost::hash_combine(hash, front_stencil.Hash());
boost::hash_combine(hash, back_stencil.Hash());
return hash;
}
bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept {
return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function,
stencil_enable, front_stencil, back_stencil) ==
std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable,
rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil,
rhs.back_stencil);
}
std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept {
std::size_t hash = attachments_count << 13;
for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) {
boost::hash_combine(hash, attachments[rt].Hash());
void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
for (std::size_t index = 0; index < std::size(attachments); ++index) {
attachments[index].Fill(regs, index);
}
return hash;
}
bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept {
return std::equal(attachments.begin(), attachments.begin() + attachments_count,
rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count);
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
mask_r.Assign(mask.R);
mask_g.Assign(mask.G);
mask_b.Assign(mask.B);
mask_a.Assign(mask.A);
// TODO: C++20 Use templated lambda to deduplicate code
if (!regs.independent_blend_enable) {
const auto& src = regs.blend;
if (!src.enable[index]) {
return;
}
equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
equation_a.Assign(PackBlendEquation(src.equation_a));
factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
enable.Assign(1);
return;
}
if (!regs.blend.enable[index]) {
return;
}
const auto& src = regs.independent_blend[index];
equation_rgb.Assign(PackBlendEquation(src.equation_rgb));
equation_a.Assign(PackBlendEquation(src.equation_a));
factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb));
factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb));
factor_source_a.Assign(PackBlendFactor(src.factor_source_a));
factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a));
enable.Assign(1);
}
std::size_t FixedPipelineState::Hash() const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, vertex_input.Hash());
boost::hash_combine(hash, input_assembly.Hash());
boost::hash_combine(hash, tessellation.Hash());
boost::hash_combine(hash, rasterizer.Hash());
boost::hash_combine(hash, depth_stencil.Hash());
boost::hash_combine(hash, color_blending.Hash());
return hash;
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
return static_cast<std::size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil,
color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly,
rhs.tessellation, rhs.rasterizer, rhs.depth_stencil,
rhs.color_blending);
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
FixedPipelineState fixed_state;
fixed_state.input_assembly = GetInputAssemblyState(regs);
fixed_state.tessellation = GetTessellationState(regs);
fixed_state.rasterizer = GetRasterizerState(regs);
fixed_state.depth_stencil = GetDepthStencilState(regs);
fixed_state.color_blending = GetColorBlendingState(regs);
fixed_state.rasterizer.Fill(regs);
fixed_state.depth_stencil.Fill(regs);
fixed_state.color_blending.Fill(regs);
fixed_state.padding = {};
return fixed_state;
}
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
// Perfect for a hash.
const u32 value = static_cast<u32>(op);
return value - (value >= 0x200 ? 0x200 : 1);
}
Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept {
// Read PackComparisonOp for the logic behind this.
return static_cast<Maxwell::ComparisonOp>(packed + 1);
}
u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept {
switch (op) {
case Maxwell::StencilOp::Keep:
case Maxwell::StencilOp::KeepOGL:
return 0;
case Maxwell::StencilOp::Zero:
case Maxwell::StencilOp::ZeroOGL:
return 1;
case Maxwell::StencilOp::Replace:
case Maxwell::StencilOp::ReplaceOGL:
return 2;
case Maxwell::StencilOp::Incr:
case Maxwell::StencilOp::IncrOGL:
return 3;
case Maxwell::StencilOp::Decr:
case Maxwell::StencilOp::DecrOGL:
return 4;
case Maxwell::StencilOp::Invert:
case Maxwell::StencilOp::InvertOGL:
return 5;
case Maxwell::StencilOp::IncrWrap:
case Maxwell::StencilOp::IncrWrapOGL:
return 6;
case Maxwell::StencilOp::DecrWrap:
case Maxwell::StencilOp::DecrWrapOGL:
return 7;
}
return 0;
}
Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero,
Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr,
Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert,
Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap};
return LUT[packed];
}
u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept {
// FrontAndBack is 0x408, by substracting 0x406 in it we get 2.
// Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1.
const u32 value = static_cast<u32>(cull);
return value - (value == 0x408 ? 0x406 : 0x404);
}
Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept {
static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back,
Maxwell::CullFace::FrontAndBack};
return LUT[packed];
}
u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept {
return static_cast<u32>(face) - 0x900;
}
Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept {
return static_cast<Maxwell::FrontFace>(packed + 0x900);
}
u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept {
return static_cast<u32>(mode) - 0x1B00;
}
Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept {
return static_cast<Maxwell::PolygonMode>(packed + 0x1B00);
}
u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept {
return static_cast<u32>(op) - 0x1500;
}
Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept {
return static_cast<Maxwell::LogicOperation>(packed + 0x1500);
}
u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept {
switch (equation) {
case Maxwell::Blend::Equation::Add:
case Maxwell::Blend::Equation::AddGL:
return 0;
case Maxwell::Blend::Equation::Subtract:
case Maxwell::Blend::Equation::SubtractGL:
return 1;
case Maxwell::Blend::Equation::ReverseSubtract:
case Maxwell::Blend::Equation::ReverseSubtractGL:
return 2;
case Maxwell::Blend::Equation::Min:
case Maxwell::Blend::Equation::MinGL:
return 3;
case Maxwell::Blend::Equation::Max:
case Maxwell::Blend::Equation::MaxGL:
return 4;
}
return 0;
}
Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract,
Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min,
Maxwell::Blend::Equation::Max};
return LUT[packed];
}
u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept {
switch (factor) {
case Maxwell::Blend::Factor::Zero:
case Maxwell::Blend::Factor::ZeroGL:
return 0;
case Maxwell::Blend::Factor::One:
case Maxwell::Blend::Factor::OneGL:
return 1;
case Maxwell::Blend::Factor::SourceColor:
case Maxwell::Blend::Factor::SourceColorGL:
return 2;
case Maxwell::Blend::Factor::OneMinusSourceColor:
case Maxwell::Blend::Factor::OneMinusSourceColorGL:
return 3;
case Maxwell::Blend::Factor::SourceAlpha:
case Maxwell::Blend::Factor::SourceAlphaGL:
return 4;
case Maxwell::Blend::Factor::OneMinusSourceAlpha:
case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
return 5;
case Maxwell::Blend::Factor::DestAlpha:
case Maxwell::Blend::Factor::DestAlphaGL:
return 6;
case Maxwell::Blend::Factor::OneMinusDestAlpha:
case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
return 7;
case Maxwell::Blend::Factor::DestColor:
case Maxwell::Blend::Factor::DestColorGL:
return 8;
case Maxwell::Blend::Factor::OneMinusDestColor:
case Maxwell::Blend::Factor::OneMinusDestColorGL:
return 9;
case Maxwell::Blend::Factor::SourceAlphaSaturate:
case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
return 10;
case Maxwell::Blend::Factor::Source1Color:
case Maxwell::Blend::Factor::Source1ColorGL:
return 11;
case Maxwell::Blend::Factor::OneMinusSource1Color:
case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
return 12;
case Maxwell::Blend::Factor::Source1Alpha:
case Maxwell::Blend::Factor::Source1AlphaGL:
return 13;
case Maxwell::Blend::Factor::OneMinusSource1Alpha:
case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
return 14;
case Maxwell::Blend::Factor::ConstantColor:
case Maxwell::Blend::Factor::ConstantColorGL:
return 15;
case Maxwell::Blend::Factor::OneMinusConstantColor:
case Maxwell::Blend::Factor::OneMinusConstantColorGL:
return 16;
case Maxwell::Blend::Factor::ConstantAlpha:
case Maxwell::Blend::Factor::ConstantAlphaGL:
return 17;
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return 18;
}
return 0;
}
Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept {
static constexpr std::array LUT = {
Maxwell::Blend::Factor::Zero,
Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::SourceColor,
Maxwell::Blend::Factor::OneMinusSourceColor,
Maxwell::Blend::Factor::SourceAlpha,
Maxwell::Blend::Factor::OneMinusSourceAlpha,
Maxwell::Blend::Factor::DestAlpha,
Maxwell::Blend::Factor::OneMinusDestAlpha,
Maxwell::Blend::Factor::DestColor,
Maxwell::Blend::Factor::OneMinusDestColor,
Maxwell::Blend::Factor::SourceAlphaSaturate,
Maxwell::Blend::Factor::Source1Color,
Maxwell::Blend::Factor::OneMinusSource1Color,
Maxwell::Blend::Factor::Source1Alpha,
Maxwell::Blend::Factor::OneMinusSource1Alpha,
Maxwell::Blend::Factor::ConstantColor,
Maxwell::Blend::Factor::OneMinusConstantColor,
Maxwell::Blend::Factor::ConstantAlpha,
Maxwell::Blend::Factor::OneMinusConstantAlpha,
};
return LUT[packed];
}
} // namespace Vulkan

View File

@@ -7,6 +7,7 @@
#include <array>
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
@@ -16,93 +17,48 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
// TODO(Rodrigo): Optimize this structure.
struct alignas(32) FixedPipelineState {
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
struct FixedPipelineState {
using PixelFormat = VideoCore::Surface::PixelFormat;
static u32 PackStencilOp(Maxwell::StencilOp op) noexcept;
static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept;
struct VertexBinding {
constexpr VertexBinding(u32 index, u32 stride, u32 divisor)
: index{index}, stride{stride}, divisor{divisor} {}
VertexBinding() = default;
static u32 PackCullFace(Maxwell::CullFace cull) noexcept;
static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept;
u32 index;
u32 stride;
u32 divisor;
static u32 PackFrontFace(Maxwell::FrontFace face) noexcept;
static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept;
std::size_t Hash() const noexcept;
static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept;
static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept;
bool operator==(const VertexBinding& rhs) const noexcept;
static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept;
static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept;
bool operator!=(const VertexBinding& rhs) const noexcept {
return !operator==(rhs);
}
};
static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept;
static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept;
struct VertexAttribute {
constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size, u32 offset)
: index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
VertexAttribute() = default;
u32 index;
u32 buffer;
Maxwell::VertexAttribute::Type type;
Maxwell::VertexAttribute::Size size;
u32 offset;
std::size_t Hash() const noexcept;
bool operator==(const VertexAttribute& rhs) const noexcept;
bool operator!=(const VertexAttribute& rhs) const noexcept {
return !operator==(rhs);
}
};
struct StencilFace {
constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
Maxwell::StencilOp action_depth_fail,
Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func)
: action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail},
action_depth_pass{action_depth_pass}, test_func{test_func} {}
StencilFace() = default;
Maxwell::StencilOp action_stencil_fail;
Maxwell::StencilOp action_depth_fail;
Maxwell::StencilOp action_depth_pass;
Maxwell::ComparisonOp test_func;
std::size_t Hash() const noexcept;
bool operator==(const StencilFace& rhs) const noexcept;
bool operator!=(const StencilFace& rhs) const noexcept {
return !operator==(rhs);
}
};
static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept;
static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept;
struct BlendingAttachment {
constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation,
Maxwell::Blend::Factor src_rgb_func,
Maxwell::Blend::Factor dst_rgb_func,
Maxwell::Blend::Equation a_equation,
Maxwell::Blend::Factor src_a_func,
Maxwell::Blend::Factor dst_a_func,
std::array<bool, 4> components)
: enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func},
dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func},
dst_a_func{dst_a_func}, components{components} {}
BlendingAttachment() = default;
union {
u32 raw;
BitField<0, 1, u32> mask_r;
BitField<1, 1, u32> mask_g;
BitField<2, 1, u32> mask_b;
BitField<3, 1, u32> mask_a;
BitField<4, 3, u32> equation_rgb;
BitField<7, 3, u32> equation_a;
BitField<10, 5, u32> factor_source_rgb;
BitField<15, 5, u32> factor_dest_rgb;
BitField<20, 5, u32> factor_source_a;
BitField<25, 5, u32> factor_dest_a;
BitField<30, 1, u32> enable;
};
bool enable;
Maxwell::Blend::Equation rgb_equation;
Maxwell::Blend::Factor src_rgb_func;
Maxwell::Blend::Factor dst_rgb_func;
Maxwell::Blend::Equation a_equation;
Maxwell::Blend::Factor src_a_func;
Maxwell::Blend::Factor dst_a_func;
std::array<bool, 4> components;
void Fill(const Maxwell& regs, std::size_t index);
std::size_t Hash() const noexcept;
@@ -111,136 +67,178 @@ struct FixedPipelineState {
bool operator!=(const BlendingAttachment& rhs) const noexcept {
return !operator==(rhs);
}
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
Maxwell::Blend::Equation EquationRGB() const noexcept {
return UnpackBlendEquation(equation_rgb.Value());
}
Maxwell::Blend::Equation EquationAlpha() const noexcept {
return UnpackBlendEquation(equation_a.Value());
}
Maxwell::Blend::Factor SourceRGBFactor() const noexcept {
return UnpackBlendFactor(factor_source_rgb.Value());
}
Maxwell::Blend::Factor DestRGBFactor() const noexcept {
return UnpackBlendFactor(factor_dest_rgb.Value());
}
Maxwell::Blend::Factor SourceAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_source_a.Value());
}
Maxwell::Blend::Factor DestAlphaFactor() const noexcept {
return UnpackBlendFactor(factor_dest_a.Value());
}
};
struct VertexInput {
std::size_t num_bindings = 0;
std::size_t num_attributes = 0;
std::array<VertexBinding, Maxwell::NumVertexArrays> bindings;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
union Binding {
u16 raw;
BitField<0, 1, u16> enabled;
BitField<1, 12, u16> stride;
};
std::size_t Hash() const noexcept;
union Attribute {
u32 raw;
BitField<0, 1, u32> enabled;
BitField<1, 5, u32> buffer;
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
bool operator==(const VertexInput& rhs) const noexcept;
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
bool operator!=(const VertexInput& rhs) const noexcept {
return !operator==(rhs);
constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
std::array<Binding, Maxwell::NumVertexArrays> bindings;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
std::array<Attribute, Maxwell::NumVertexAttributes> attributes;
void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept {
auto& binding = bindings[index];
binding.raw = 0;
binding.enabled.Assign(enabled ? 1 : 0);
binding.stride.Assign(stride);
binding_divisors[index] = divisor;
}
};
struct InputAssembly {
constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable,
float point_size)
: topology{topology}, primitive_restart_enable{primitive_restart_enable},
point_size{point_size} {}
InputAssembly() = default;
Maxwell::PrimitiveTopology topology;
bool primitive_restart_enable;
float point_size;
std::size_t Hash() const noexcept;
bool operator==(const InputAssembly& rhs) const noexcept;
bool operator!=(const InputAssembly& rhs) const noexcept {
return !operator==(rhs);
}
};
struct Tessellation {
constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive,
Maxwell::TessellationSpacing spacing, bool clockwise)
: patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing},
clockwise{clockwise} {}
Tessellation() = default;
u32 patch_control_points;
Maxwell::TessellationPrimitive primitive;
Maxwell::TessellationSpacing spacing;
bool clockwise;
std::size_t Hash() const noexcept;
bool operator==(const Tessellation& rhs) const noexcept;
bool operator!=(const Tessellation& rhs) const noexcept {
return !operator==(rhs);
void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset,
Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size) noexcept {
auto& attribute = attributes[index];
attribute.raw = 0;
attribute.enabled.Assign(enabled ? 1 : 0);
attribute.buffer.Assign(buffer);
attribute.offset.Assign(offset);
attribute.type.Assign(static_cast<u32>(type));
attribute.size.Assign(static_cast<u32>(size));
}
};
struct Rasterizer {
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
bool ndc_minus_one_to_one, Maxwell::CullFace cull_face,
Maxwell::FrontFace front_face)
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
cull_face{cull_face}, front_face{front_face} {}
Rasterizer() = default;
union {
u32 raw;
BitField<0, 4, u32> topology;
BitField<4, 1, u32> primitive_restart_enable;
BitField<5, 1, u32> cull_enable;
BitField<6, 1, u32> depth_bias_enable;
BitField<7, 1, u32> depth_clamp_enable;
BitField<8, 1, u32> ndc_minus_one_to_one;
BitField<9, 2, u32> cull_face;
BitField<11, 1, u32> front_face;
BitField<12, 2, u32> polygon_mode;
BitField<14, 5, u32> patch_control_points_minus_one;
BitField<19, 2, u32> tessellation_primitive;
BitField<21, 2, u32> tessellation_spacing;
BitField<23, 1, u32> tessellation_clockwise;
BitField<24, 1, u32> logic_op_enable;
BitField<25, 4, u32> logic_op;
};
bool cull_enable;
bool depth_bias_enable;
bool depth_clamp_enable;
bool ndc_minus_one_to_one;
Maxwell::CullFace cull_face;
Maxwell::FrontFace front_face;
// TODO(Rodrigo): Move this to push constants
u32 point_size;
std::size_t Hash() const noexcept;
void Fill(const Maxwell& regs) noexcept;
bool operator==(const Rasterizer& rhs) const noexcept;
constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
}
bool operator!=(const Rasterizer& rhs) const noexcept {
return !operator==(rhs);
Maxwell::CullFace CullFace() const noexcept {
return UnpackCullFace(cull_face.Value());
}
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
};
struct DepthStencil {
constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable,
bool depth_bounds_enable, bool stencil_enable,
Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil,
StencilFace back_stencil)
: depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable},
depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable},
depth_test_function{depth_test_function}, front_stencil{front_stencil},
back_stencil{back_stencil} {}
DepthStencil() = default;
template <std::size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
BitField<Position + 6, 3, u32> action_depth_pass;
BitField<Position + 9, 3, u32> test_func;
bool depth_test_enable;
bool depth_write_enable;
bool depth_bounds_enable;
bool stencil_enable;
Maxwell::ComparisonOp depth_test_function;
StencilFace front_stencil;
StencilFace back_stencil;
Maxwell::StencilOp ActionStencilFail() const noexcept {
return UnpackStencilOp(action_stencil_fail);
}
std::size_t Hash() const noexcept;
Maxwell::StencilOp ActionDepthFail() const noexcept {
return UnpackStencilOp(action_depth_fail);
}
bool operator==(const DepthStencil& rhs) const noexcept;
Maxwell::StencilOp ActionDepthPass() const noexcept {
return UnpackStencilOp(action_depth_pass);
}
bool operator!=(const DepthStencil& rhs) const noexcept {
return !operator==(rhs);
Maxwell::ComparisonOp TestFunc() const noexcept {
return UnpackComparisonOp(test_func);
}
};
union {
u32 raw;
StencilFace<0> front;
StencilFace<12> back;
BitField<24, 1, u32> depth_test_enable;
BitField<25, 1, u32> depth_write_enable;
BitField<26, 1, u32> depth_bounds_enable;
BitField<27, 1, u32> stencil_enable;
BitField<28, 3, u32> depth_test_func;
};
void Fill(const Maxwell& regs) noexcept;
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
}
};
struct ColorBlending {
constexpr ColorBlending(
std::array<float, 4> blend_constants, std::size_t attachments_count,
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments)
: attachments_count{attachments_count}, attachments{attachments} {}
ColorBlending() = default;
std::size_t attachments_count;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::size_t Hash() const noexcept;
bool operator==(const ColorBlending& rhs) const noexcept;
bool operator!=(const ColorBlending& rhs) const noexcept {
return !operator==(rhs);
}
void Fill(const Maxwell& regs) noexcept;
};
VertexInput vertex_input;
Rasterizer rasterizer;
DepthStencil depth_stencil;
ColorBlending color_blending;
std::array<u8, 20> padding;
std::size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
@@ -248,25 +246,11 @@ struct FixedPipelineState {
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
VertexInput vertex_input;
InputAssembly input_assembly;
Tessellation tessellation;
Rasterizer rasterizer;
DepthStencil depth_stencil;
ColorBlending color_blending;
};
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>);
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
FixedPipelineState GetFixedPipelineState(const Maxwell& regs);

View File

@@ -0,0 +1,220 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#ifdef HAS_NSIGHT_AFTERMATH
#include <mutex>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include <fmt/format.h>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#include <GFSDK_Aftermath.h>
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
namespace Vulkan {
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
NsightAftermathTracker::NsightAftermathTracker() = default;
NsightAftermathTracker::~NsightAftermathTracker() {
if (initialized) {
(void)GFSDK_Aftermath_DisableGpuCrashDumps();
}
}
bool NsightAftermathTracker::Initialize() {
if (!dl.Open(AFTERMATH_LIB_NAME)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
return false;
}
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
&GFSDK_Aftermath_EnableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
&GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
&GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
&GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
&GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
return false;
}
dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
(void)FileUtil::DeleteDirRecursively(dump_dir);
if (!FileUtil::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return false;
}
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
return false;
}
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
initialized = true;
return true;
}
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
if (!initialized) {
return;
}
std::vector<u32> spirv_copy = spirv;
GFSDK_Aftermath_SpirvCode shader;
shader.pData = spirv_copy.data();
shader.size = static_cast<u32>(spirv_copy.size() * 4);
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderHash hash;
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
return;
}
FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
}
if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
return;
}
}
void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size) {
std::scoped_lock lock{mutex};
LOG_CRITICAL(Render_Vulkan, "called");
GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
LOG_ERROR(Render_Vulkan, "Failed to create decoder");
return;
}
SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
u32 json_size = 0;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
this, &json_size))) {
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
return;
}
std::vector<char> json(json_size);
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
LOG_ERROR(Render_Vulkan, "Failed to query JSON");
return;
}
const std::string base_name = [this] {
const int id = dump_id++;
if (id == 0) {
return fmt::format("{}/crash.nv-gpudmp", dump_dir);
} else {
return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
}
}();
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
return;
}
const std::string_view json_view(json.data(), json.size());
if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
return;
}
}
void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size) {
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
return;
}
const std::string path =
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
FileUtil::IOFile file(path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
return;
}
if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
shader_debug_info_size) {
LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
return;
}
}
void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
}
void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
gpu_crash_dump_size);
}
void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
shader_debug_info, shader_debug_info_size);
}
void NsightAftermathTracker::CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
add_description);
}
} // namespace Vulkan
#endif // HAS_NSIGHT_AFTERMATH

View File

@@ -0,0 +1,87 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <string>
#include <vector>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#ifdef HAS_NSIGHT_AFTERMATH
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#endif
#include "common/common_types.h"
#include "common/dynamic_library.h"
namespace Vulkan {
class NsightAftermathTracker {
public:
NsightAftermathTracker();
~NsightAftermathTracker();
NsightAftermathTracker(const NsightAftermathTracker&) = delete;
NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
// Delete move semantics because Aftermath initialization uses a pointer to this.
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
bool Initialize();
void SaveShader(const std::vector<u32>& spirv) const;
private:
#ifdef HAS_NSIGHT_AFTERMATH
static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
void* user_data);
static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
void* user_data);
static void CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
void OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
mutable std::mutex mutex;
std::string dump_dir;
int dump_id = 0;
bool initialized = false;
Common::DynamicLibrary dl;
PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
#endif
};
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
inline bool NsightAftermathTracker::Initialize() {
return false;
}
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
#endif
} // namespace Vulkan

View File

@@ -42,7 +42,7 @@
#include <vulkan/vulkan_win32.h>
#endif
#ifdef __linux__
#if !defined(_WIN32) && !defined(__APPLE__)
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
@@ -119,7 +119,7 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#endif
#ifdef __linux__
#if !defined(_WIN32) && !defined(__APPLE__)
case Core::Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
@@ -345,7 +345,7 @@ bool RendererVulkan::CreateSurface() {
}
}
#endif
#ifdef __linux__
#if !defined(_WIN32) && !defined(__APPLE__)
if (window_info.type == Core::Frontend::WindowSystemType::X11) {
const VkXlibSurfaceCreateInfoKHR xlib_ci{
VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,

View File

@@ -0,0 +1,50 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V quad_indexed.comp -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
uint input_indexes[];
};
layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint output_indexes[];
};
layout (push_constant) uniform PushConstants {
uint base_vertex;
int index_shift; // 0: uint8, 1: uint16, 2: uint32
};
void main() {
int primitive = int(gl_GlobalInvocationID.x);
if (primitive * 6 >= output_indexes.length()) {
return;
}
int index_size = 8 << index_shift;
int flipped_shift = 2 - index_shift;
int mask = (1 << flipped_shift) - 1;
const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
for (uint vertex = 0; vertex < 6; ++vertex) {
int offset = primitive * 4 + quad_swizzle[vertex];
int int_offset = offset >> flipped_shift;
int bit_offset = (offset & mask) * index_size;
uint packed_input = input_indexes[int_offset];
uint index = bitfieldExtract(packed_input, bit_offset, index_size);
output_indexes[primitive * 6 + vertex] = index + base_vertex;
}
}

View File

@@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt
return entry;
}
VkPushConstantRange BuildQuadArrayPassPushConstantRange() {
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
VkPushConstantRange range;
range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
range.offset = 0;
range.size = sizeof(u32);
range.size = static_cast<u32>(size);
return range;
}
@@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = {
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() {
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 QUAD_INDEXED_SPV[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
std::array<VkDescriptorSetLayoutBinding, 2> bindings;
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings(
return bindings;
}
VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() {
VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
VkDescriptorUpdateTemplateEntryKHR entry;
entry.dstBinding = 0;
entry.dstArrayElement = 0;
@@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
VKUpdateDescriptorQueue& update_descriptor_queue)
: VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array),
BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
update_descriptor_queue{update_descriptor_queue} {}
QuadArrayPass::~QuadArrayPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = num_vertices * 6 / 4;
const u32 num_triangle_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
@@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(),
BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass),
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
uint8_pass),
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
update_descriptor_queue{update_descriptor_queue} {}
@@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
return {*buffer.handle, 0};
}
QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
BuildInputOutputDescriptorUpdateTemplate(),
BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
QUAD_INDEXED_SPV),
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
update_descriptor_queue{update_descriptor_queue} {}
QuadIndexedPass::~QuadIndexedPass() = default;
std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
VkBuffer src_buffer, u64 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
return 0;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
return 1;
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
return 2;
}
UNREACHABLE();
return 2;
}();
const u32 input_size = num_vertices << index_shift;
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 dispatch_size = 1024;
const std::array push_constants = {base_vertex, index_shift};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = buffer;
barrier.offset = 0;
barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
return {*buffer.handle, 0};
}
} // namespace Vulkan

View File

@@ -8,6 +8,7 @@
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -73,4 +74,22 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class QuadIndexedPass final : public VKComputePass {
public:
explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~QuadIndexedPass();
std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
u64 src_offset);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
} // namespace Vulkan

View File

@@ -105,6 +105,8 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
}
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
device.SaveShader(code);
VkShaderModuleCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ci.pNext = nullptr;

View File

@@ -9,6 +9,7 @@
#include <string_view>
#include <thread>
#include <unordered_set>
#include <utility>
#include <vector>
#include "common/assert.h"
@@ -167,6 +168,7 @@ bool VKDevice::Create() {
VkPhysicalDeviceFeatures2 features2;
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = nullptr;
const void* first_next = &features2;
void** next = &features2.pNext;
auto& features = features2.features;
@@ -296,7 +298,19 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld);
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
if (nv_device_diagnostics_config) {
nsight_aftermath_tracker.Initialize();
diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
diagnostics_nv.pNext = &features2;
diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
first_next = &diagnostics_nv;
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
if (!logical) {
LOG_ERROR(Render_Vulkan, "Failed to create logical device");
return false;
@@ -344,17 +358,12 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait some time to let the log flush
std::this_thread::sleep_for(std::chrono::seconds{1});
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{3});
}
if (!nv_device_diagnostic_checkpoints) {
return;
}
[[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld);
// Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
// executed. It can be done on a debugger by evaluating the expression:
// *(VKGraphicsPipeline*)data[0]
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
nsight_aftermath_tracker.SaveShader(spirv);
}
bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
@@ -527,8 +536,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
Test(extension, nv_device_diagnostics_config,
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
}
}

View File

@@ -10,6 +10,7 @@
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
@@ -43,6 +44,9 @@ public:
/// Reports a device loss.
void ReportLoss() const;
/// Reports a shader to Nsight Aftermath.
void SaveShader(const std::vector<u32>& spirv) const;
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DeviceDispatch& GetDispatchLoader() const {
return dld;
@@ -173,11 +177,6 @@ public:
return ext_transform_feedback;
}
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -233,7 +232,7 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
@@ -241,6 +240,9 @@ private:
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
/// Nsight Aftermath GPU crash tracker
NsightAftermathTracker nsight_aftermath_tracker;
};
} // namespace Vulkan

View File

@@ -0,0 +1,101 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <thread>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(!event);
event = device.GetLogical().CreateEvent();
ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
});
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(event);
return IsEventSignalled();
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(event);
if (ticks >= scheduler.Ticks()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
std::this_thread::yield();
}
}
bool InnerFence::IsEventSignalled() const {
switch (const VkResult result = event.GetStatus()) {
case VK_EVENT_SET:
return true;
case VK_EVENT_RESET:
return false;
default:
throw vk::Exception(result);
}
}
VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
device{device}, scheduler{scheduler} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
}
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
}
void VKFenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void VKFenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan

View File

@@ -0,0 +1,74 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class VKBufferCache;
class VKDevice;
class VKQueryCache;
class VKScheduler;
class VKTextureCache;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
bool is_stubbed);
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
bool IsEventSignalled() const;
const VKDevice& device;
VKScheduler& scheduler;
vk::Event event;
u64 ticks = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
const VKDevice& device;
VKScheduler& scheduler;
};
} // namespace Vulkan

View File

@@ -26,12 +26,13 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
VkStencilOpState state;
state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail);
state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass);
state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail);
state.compareOp = MaxwellToVK::ComparisonOp(face.test_func);
state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail());
state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass());
state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail());
state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc());
state.compareMask = 0;
state.writeMask = 0;
state.reference = 0;
@@ -147,6 +148,8 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
continue;
}
device.SaveShader(stage->code);
ci.codeSize = stage->code.size() * sizeof(u32);
ci.pCode = stage->code.data();
modules.push_back(device.GetLogical().CreateShaderModule(ci));
@@ -157,43 +160,47 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
const auto& vi = fixed_state.vertex_input;
const auto& ia = fixed_state.input_assembly;
const auto& ds = fixed_state.depth_stencil;
const auto& cd = fixed_state.color_blending;
const auto& ts = fixed_state.tessellation;
const auto& rs = fixed_state.rasterizer;
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t i = 0; i < vi.num_bindings; ++i) {
const auto& binding = vi.bindings[i];
const bool instanced = binding.divisor != 0;
for (std::size_t index = 0; index < std::size(vi.bindings); ++index) {
const auto& binding = vi.bindings[index];
if (!binding.enabled) {
continue;
}
const bool instanced = vi.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
auto& vertex_binding = vertex_bindings.emplace_back();
vertex_binding.binding = binding.index;
vertex_binding.binding = static_cast<u32>(index);
vertex_binding.stride = binding.stride;
vertex_binding.inputRate = rate;
if (instanced) {
auto& binding_divisor = vertex_binding_divisors.emplace_back();
binding_divisor.binding = binding.index;
binding_divisor.divisor = binding.divisor;
binding_divisor.binding = static_cast<u32>(index);
binding_divisor.divisor = vi.binding_divisors[index];
}
}
std::vector<VkVertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
for (std::size_t i = 0; i < vi.num_attributes; ++i) {
const auto& attribute = vi.attributes[i];
if (input_attributes.find(attribute.index) == input_attributes.end()) {
for (std::size_t index = 0; index < std::size(vi.attributes); ++index) {
const auto& attribute = vi.attributes[index];
if (!attribute.enabled) {
continue;
}
if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) {
// Skip attributes not used by the vertex shaders.
continue;
}
auto& vertex_attribute = vertex_attributes.emplace_back();
vertex_attribute.location = attribute.index;
vertex_attribute.location = static_cast<u32>(index);
vertex_attribute.binding = attribute.buffer;
vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size);
vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size());
vertex_attribute.offset = attribute.offset;
}
@@ -219,15 +226,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_assembly_ci.pNext = nullptr;
input_assembly_ci.flags = 0;
input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology());
input_assembly_ci.primitiveRestartEnable =
ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology);
rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);
VkPipelineTessellationStateCreateInfo tessellation_ci;
tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
tessellation_ci.pNext = nullptr;
tessellation_ci.flags = 0;
tessellation_ci.patchControlPoints = ts.patch_control_points;
tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1;
VkPipelineViewportStateCreateInfo viewport_ci;
viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
@@ -246,8 +253,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
rasterization_ci.cullMode =
rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE;
rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face);
rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE;
rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace());
rasterization_ci.depthBiasEnable = rs.depth_bias_enable;
rasterization_ci.depthBiasConstantFactor = 0.0f;
rasterization_ci.depthBiasClamp = 0.0f;
@@ -271,40 +278,38 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
depth_stencil_ci.flags = 0;
depth_stencil_ci.depthTestEnable = ds.depth_test_enable;
depth_stencil_ci.depthWriteEnable = ds.depth_write_enable;
depth_stencil_ci.depthCompareOp = ds.depth_test_enable
? MaxwellToVK::ComparisonOp(ds.depth_test_function)
: VK_COMPARE_OP_ALWAYS;
depth_stencil_ci.depthCompareOp =
ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS;
depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable;
depth_stencil_ci.stencilTestEnable = ds.stencil_enable;
depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil);
depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil);
depth_stencil_ci.front = GetStencilFaceState(ds.front);
depth_stencil_ci.back = GetStencilFaceState(ds.back);
depth_stencil_ci.minDepthBounds = 0.0f;
depth_stencil_ci.maxDepthBounds = 0.0f;
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const std::size_t num_attachments =
std::min(cd.attachments_count, renderpass_params.color_attachments.size());
for (std::size_t i = 0; i < num_attachments; ++i) {
static constexpr std::array component_table = {
const std::size_t num_attachments = renderpass_params.color_attachments.size();
for (std::size_t index = 0; index < num_attachments; ++index) {
static constexpr std::array COMPONENT_TABLE = {
VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT};
const auto& blend = cd.attachments[i];
const auto& blend = cd.attachments[index];
VkColorComponentFlags color_components = 0;
for (std::size_t j = 0; j < component_table.size(); ++j) {
if (blend.components[j]) {
color_components |= component_table[j];
for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
if (blend.Mask()[i]) {
color_components |= COMPONENT_TABLE[i];
}
}
VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i];
attachment.blendEnable = blend.enable;
attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func);
attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func);
attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation);
attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func);
attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func);
attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation);
VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index];
attachment.blendEnable = blend.enable != 0;
attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor());
attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor());
attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB());
attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor());
attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor());
attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha());
attachment.colorWriteMask = color_components;
}

View File

@@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
@@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
std::move(code), stage_offset);
Register(shader);
if (cpu_addr) {
Register(shader);
} else {
null_shader = shader;
}
}
shaders[index] = std::move(shader);
}
@@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
@@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, std::move(code),
kernel_main_offset);
Register(shader);
if (cpu_addr) {
Register(shader);
} else {
null_kernel = shader;
}
}
Specialization specialization;
@@ -329,12 +337,14 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) {
float point_size;
std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float));
specialization.point_size = point_size;
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;

View File

@@ -182,6 +182,9 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
Shader null_shader{};
Shader null_kernel{};
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
GraphicsPipelineCacheKey last_graphics_key;

View File

@@ -113,8 +113,19 @@ u64 HostCounter::BlockingQuery() const {
if (ticks >= cache.Scheduler().Ticks()) {
cache.Scheduler().Flush();
}
return cache.Device().GetLogical().GetQueryResult<u64>(
query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
u64 data;
const VkResult result = cache.Device().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
cache.Device().ReportLoss();
[[fallthrough]];
default:
throw vk::Exception(result);
}
}
} // namespace Vulkan

View File

@@ -17,6 +17,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -292,13 +293,16 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) {
sampler_cache(device),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
}
@@ -346,11 +350,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_bindings.Bind(scheduler);
if (device.IsNvDeviceDiagnosticCheckpoints()) {
scheduler.Record(
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); });
}
BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout();
@@ -364,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
});
EndTransformFeedback();
system.GPU().TickWork();
}
void RasterizerVulkan::Clear() {
@@ -477,11 +478,6 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
if (device.IsNvDeviceDiagnosticCheckpoints()) {
scheduler.Record(
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); });
}
scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
layout = pipeline.GetLayout(),
@@ -513,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
@@ -523,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.OnCPUWrite(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::SyncGuestHost() {
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.MemoryManager().Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
}
fence_manager.SignalSyncPoint(value);
}
void RasterizerVulkan::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
@@ -806,25 +850,29 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& attrib = regs.vertex_attrib_format[index];
if (!attrib.IsValid()) {
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
continue;
}
const auto& buffer = regs.vertex_array[attrib.buffer];
[[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
ASSERT(buffer.IsEnabled());
vertex_input.attributes[vertex_input.num_attributes++] =
FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
attrib.offset);
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
attrib.size.Value());
}
for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) {
vertex_input.SetBinding(index, false, 0, 0);
continue;
}
vertex_input.SetBinding(
index, true, vertex_array.stride,
regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
const GPUVAddr start{vertex_array.StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
@@ -832,10 +880,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
ASSERT(end > start);
const std::size_t size{end - start + 1};
const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
index, vertex_array.stride,
regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
buffer_bindings.AddVertexBinding(buffer, offset);
}
}
@@ -844,18 +888,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
bool is_indexed) {
const auto& regs = system.GPU().Maxwell3D().regs;
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads:
if (params.is_indexed) {
UNIMPLEMENTED();
} else {
case Maxwell::PrimitiveTopology::Quads: {
if (!params.is_indexed) {
const auto [buffer, offset] =
quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
params.base_vertex = 0;
params.num_vertices = params.num_vertices * 6 / 4;
params.is_indexed = true;
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
params.num_vertices = (params.num_vertices / 4) * 6;
params.base_vertex = 0;
break;
}
default: {
if (!is_indexed) {
break;

View File

@@ -21,6 +21,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -118,7 +119,13 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
@@ -254,12 +261,14 @@ private:
VKUpdateDescriptorQueue update_descriptor_queue;
VKRenderPassCache renderpass_cache;
QuadArrayPass quad_array_pass;
QuadIndexedPass quad_indexed_pass;
Uint8Pass uint8_pass;
VKTextureCache texture_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
VKFenceManager fence_manager;
VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments;

View File

@@ -166,7 +166,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
submit_info.pCommandBuffers = current_cmdbuf.address();
submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
submit_info.pSignalSemaphores = &semaphore;
device.GetGraphicsQueue().Submit(submit_info, *current_fence);
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
}
}
void VKScheduler::AllocateNewContext() {

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include <optional>
#include <tuple>
#include <vector>
@@ -22,22 +23,38 @@ namespace {
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
VkMemoryPropertyFlags wanted) {
const auto properties = device.GetPhysical().GetMemoryProperties();
for (u32 i = 0; i < properties.memoryTypeCount; i++) {
if (!(filter & (1 << i))) {
continue;
}
if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
VkMemoryPropertyFlags wanted,
u32 filter = std::numeric_limits<u32>::max()) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
u32 filter = std::numeric_limits<u32>::max()) {
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
// Otherwise search for a host visible allocation.
static constexpr auto HOST_MEMORY =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
if (!preferred_type) {
preferred_type = FindMemoryType(properties, HOST_MEMORY);
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
}
return preferred_type.value_or(0);
}
} // Anonymous namespace
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
@@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
VKStreamBuffer::~VKStreamBuffer() = default;
std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= STREAM_BUFFER_SIZE);
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
@@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
WaitPendingOperations(offset);
bool invalidated = false;
if (offset + size > STREAM_BUFFER_SIZE) {
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
@@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) {
}
void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
VkBufferCreateInfo buffer_ci;
buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_ci.pNext = nullptr;
buffer_ci.flags = 0;
buffer_ci.size = STREAM_BUFFER_SIZE;
buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
buffer_ci.usage = usage;
buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_ci.queueFamilyIndexCount = 0;
buffer_ci.pQueueFamilyIndices = nullptr;
const auto& dev = device.GetLogical();
buffer = dev.CreateBuffer(buffer_ci);
buffer = device.GetLogical().CreateBuffer(buffer_ci);
const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
const auto& dld = device.GetDispatchLoader();
const auto requirements = dev.GetBufferMemoryRequirements(*buffer);
// Prefer device local host visible allocations (this should hit AMD's pinned memory).
auto type =
FindMemoryType(device, requirements.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (!type) {
// Otherwise search for a host visible allocation.
type = FindMemoryType(device, requirements.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
ASSERT_MSG(type, "No host visible and coherent memory type found");
}
VkMemoryAllocateInfo memory_ai;
memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memory_ai.pNext = nullptr;
memory_ai.allocationSize = requirements.size;
memory_ai.memoryTypeIndex = *type;
memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
memory = dev.AllocateMemory(memory_ai);
memory = device.GetLogical().AllocateMemory(memory_ai);
buffer.BindMemory(*memory, 0);
}

Some files were not shown because too many files have changed in this diff Show More