Compare commits

...

142 Commits

Author SHA1 Message Date
Guo Yunhe
dd236c6c1d Replace non-commercial icons with free icons from icons8 (#5093) 2020-03-27 22:31:54 +01:00
Fernando Sahmkow
7a2f60df26 Merge pull request #3565 from ReinUsesLisp/image-format
engines/const_buffer_engine_interface: Store image format and types
2020-03-27 14:08:54 -04:00
ReinUsesLisp
cedbe925cd engines/const_buffer_engine_interface: Store image format type
This information is required to properly implement SULD.B. It might also
be handy for all image operations, since it would allow us to implement
them on devices that require the image format to be specified (on
desktop, this would be AMD on OpenGL and Intel on OpenGL and Vulkan).
2020-03-27 00:36:22 -03:00
Mat M
e84b760016 Merge pull request #3564 from makigumo/maxwell_to_vk_sscaled
maxwell_to_vk: implement signedscaled vertex formats
2020-03-26 21:05:52 -04:00
Dan
744b207d92 maxwell_to_vk: implement signedscaled vertex formats 2020-03-27 00:14:19 +01:00
bunnei
950b6dbc80 Merge pull request #3453 from FearlessTobi/remove-pause-lock
yuzu: Remove exit lock for game pausing
2020-03-26 16:42:57 -04:00
bunnei
3194f14aca Merge pull request #3550 from hughesjs/hughesjs-readme-changes-1
Minor Readme Changes
2020-03-26 00:50:56 -04:00
bunnei
8244536f7a Merge pull request #3548 from jroweboy/use-plugins-dir
Use the correct directory for Qt Plugins
2020-03-26 00:50:06 -04:00
bunnei
23c7dda710 Merge pull request #3544 from makigumo/myfork/patch-2
xmad: fix clang build error
2020-03-25 19:29:16 -04:00
bunnei
e6aff11057 Merge pull request #3520 from ReinUsesLisp/legacy-varyings
gl_shader_decompiler: Implement legacy varyings
2020-03-25 19:27:51 -04:00
James Hughes
aa41fcc04e Update README.md 2020-03-24 22:30:37 +00:00
James Hughes
ac4154bfde Minor ReadMe Changes
Added Discord shield and direct link to contribution guide
2020-03-24 22:26:07 +00:00
bunnei
f8382c9d9d Merge pull request #3524 from FearlessTobi/port-5106
Port citra-emu/citra#5106: "gdbstub: Ensure gdbstub doesn't drop packets crucial to initialization"
2020-03-24 16:50:58 -04:00
James Rowe
6ca8637d4c Use the correct directory for Qt Plugins 2020-03-23 18:51:46 -06:00
Fernando Sahmkow
497f593525 Merge pull request #3543 from ReinUsesLisp/gl-depth-range
gl_rasterizer: Use transformed viewport for depth ranges
2020-03-23 12:00:21 -04:00
Fernando Sahmkow
7981910746 Merge pull request #3542 from namkazt/patch-10
Implement MME shadow RAM
2020-03-23 12:00:01 -04:00
bunnei
dc4415811c Merge pull request #3546 from FearlessTobi/pointer-buffer-size
sm/controller: Increase PointerBufferSize
2020-03-22 23:31:08 -04:00
FearlessTobi
4afebf26b6 sm/controller: Increase PointerBufferSize
This increases the PointerBufferSize as a lager one is required by some services.
This change is still not hw-accurate, but it is proven to work in Ryujinx.

Instead of using a hardcoded size, we should figure out the specific values for each service in the future. Some of them can be taken from Atmosphere: https://github.com/Atmosphere-NX/Atmosphere/search?q=PointerBufferSize.
2020-03-23 03:19:30 +01:00
makigumo
5a5c6d4ed8 xmad: fix clang build error 2020-03-23 00:09:31 +01:00
bunnei
e731c4b991 Merge pull request #3477 from FearlessTobi/webapplet-shit
core/web_browser: Allow WebApplet to exit gracefully when an error occurs
2020-03-22 13:11:02 -04:00
namkazy
fc37672f26 apply replay logic to all writes. remove replay from MacroInterpreter::Send (@fincs) 2020-03-22 22:25:44 +07:00
FearlessTobi
977418c65b core/web_browser: Allow WebApplet to exit gracefully when an error occurs
Currently, yuzu just freezes when an error occurs while Initializing the WebApplet.
From a user perspective, this obviously isn't great as the game just softlocks.
With this change, yuzu will call the Finalize method, so to the game it seems like as the user just exited the WebApplet normally.

This works around https://github.com/yuzu-emu/yuzu/issues/2852.
2020-03-22 16:01:13 +01:00
namkazy
f66743cd0c maxwell_3d: change declaration order 2020-03-22 13:41:16 +07:00
namkazy
d4e93cf38c maxwell_3d: init shadow_state 2020-03-22 13:35:11 +07:00
ReinUsesLisp
bdcedc8506 gl_rasterizer: Use transformed viewport for depth ranges
Implement depth ranges using the transformed viewport instead of the
generic one. This matches the current Vulkan implementation but doesn't
support negative depth ranges. An update to glad is required for this.
2020-03-22 03:26:07 -03:00
namkazy
22f4268c2f maxwell_3d: this seem more correct. 2020-03-22 12:02:54 +07:00
namkazy
7051dc1902 maxwell_3d: update comments for shadow ram usage 2020-03-22 11:35:26 +07:00
Nguyen Dac Nam
01af036c1f marco_interpreter: write hw value when shadow ram requested 2020-03-22 10:53:41 +07:00
Nguyen Dac Nam
63c2635e6f maxwell_3d: track shadow ram ctrl and hw reg value 2020-03-22 10:53:41 +07:00
Nguyen Dac Nam
dbfbe352e0 maxwell_3d: implement MME shadow RAM 2020-03-22 10:53:35 +07:00
bunnei
e5bb5d13c4 Merge pull request #3531 from makigumo/yuzu_master
set: implement GetRegionCode
2020-03-21 22:49:28 -04:00
bunnei
e70451d967 Merge pull request #3525 from FearlessTobi/linux-compile-error
input_common/udp: Fix Linux build by using a backwards compatible way of error checking
2020-03-21 16:04:45 -04:00
bunnei
81fa492825 Merge pull request #3526 from FearlessTobi/bcat-disable
bcat: Disable Boxcat backend by default
2020-03-20 13:01:28 -04:00
bunnei
bdddbe2daa Merge pull request #3505 from namkazt/patch-8
shader_decode: implement XMAD mode CSfu
2020-03-19 17:41:01 -04:00
Dan
06dea163fa set: implement GetRegionCode 2020-03-19 10:37:42 +01:00
bunnei
bc681dc555 Merge pull request #3527 from FearlessTobi/output-mode
yuzu: Save sound output mode and set it to Stereo by default
2020-03-18 23:19:22 -04:00
Mat M
9418b983bd Merge pull request #3535 from ReinUsesLisp/gcc-warnings
video_core: Silence misc warnings
2020-03-18 20:09:32 -04:00
bunnei
76d6178e4a Merge pull request #3534 from ReinUsesLisp/oob-time-zone
time_zone_content_manager: Fix out of bounds read
2020-03-18 19:12:05 -04:00
ReinUsesLisp
38c1e77f01 vk_texture_cache: Silence misc warnings 2020-03-18 20:03:19 -03:00
ReinUsesLisp
b6b2e31e5e vk_staging_buffer_pool: Silence unused constant warning 2020-03-18 20:03:19 -03:00
ReinUsesLisp
fc51ece7bf vk_rasterizer: Remove unused variable 2020-03-18 20:03:19 -03:00
ReinUsesLisp
98d85cdc20 vk_pipeline_cache: Remove unused variable 2020-03-18 20:03:19 -03:00
ReinUsesLisp
dab450ec46 maxwell_to_vk: Sielence -Wswitch warning 2020-03-18 20:03:19 -03:00
ReinUsesLisp
351816ac38 gl_shader_decompiler: Remove deprecated function and its usages 2020-03-18 20:03:19 -03:00
ReinUsesLisp
acf328a71f gl_rasterizer: Silence misc warnings 2020-03-18 20:03:19 -03:00
ReinUsesLisp
9f46066bda kepler_compute: Remove unused variables 2020-03-18 20:03:19 -03:00
ReinUsesLisp
ba9674862d microprofile: Silence sign comparison warning 2020-03-18 20:03:19 -03:00
ReinUsesLisp
ac7ee21331 time_zone_content_manager: Fix out of bounds read
There were cases where raw_data didn't contain enough
space to hold the zero terminator.

This was caught with -fsanitize=address.
2020-03-18 19:06:16 -03:00
Mat M
56ea0f8acb Merge pull request #3530 from ReinUsesLisp/fix-clang
astc/input_common: Fix clang build issues
2020-03-18 04:28:55 -04:00
ReinUsesLisp
716d6aee30 input_common/udp: Fix clang build issues 2020-03-18 04:30:26 -03:00
ReinUsesLisp
664fa4ea06 astc: Fix clang build issues 2020-03-18 04:30:25 -03:00
ReinUsesLisp
f5658a9fda gl_shader_decompiler: Don't redeclare gl_VertexID and gl_InstanceID 2020-03-18 01:28:41 -03:00
Mat M
edb9cccb36 Merge pull request #3510 from FernandoS27/dirty-write
DirtyFlags: relax need to set render_targets as dirty
2020-03-17 17:29:22 -04:00
Mat M
f54d2d3114 Merge pull request #3509 from ReinUsesLisp/astc-opts
astc: General changes and optimizations
2020-03-17 17:28:49 -04:00
Mat M
d787856621 Merge pull request #3518 from ReinUsesLisp/scissor-clears
vk_rasterizer: Implement scissor clears and layered clears
2020-03-17 17:27:15 -04:00
Mat M
9fdfd58f9f Merge pull request #3519 from ReinUsesLisp/int-formats
maxwell_to_vk: Implement RG32 and RGB32 integer vertex formats
2020-03-17 17:26:16 -04:00
FearlessTobi
cdeadd448b yuzu: Save sound output mode and set it to Stereo by default 2020-03-17 19:03:00 +01:00
bunnei
1c45c8086e Merge pull request #3498 from ReinUsesLisp/texel-fetch-glsl
gl_shader_decompiler: Add layer component to texelFetch
2020-03-17 10:53:38 -04:00
FearlessTobi
2fd3b328ae bcat: Disable Boxcat backend by default
This commit disables the Boxcat backend by default for new users of yuzu.

There's several reasons as to why this is done:
1. Boxcat currently only actually has an impact on 3 games and doesn't influence any core mechanics of them
2. It causes a plethora of issues when enabled such as games like Crash Team Racing, Diablo 3 and Tales of Vesperia not booting at all or hanging
3. It causes https://github.com/yuzu-emu/yuzu/issues/2957 to happen. This makes the configuration menu totally unusable for many Linux users of yuzu

I think those points show that currently the negative impact of Boxcat outweighs its benefits and should therefore be disabled by default.
For users who are eager to use the extra features provided by it, they can still just turn it on in the settings.
2020-03-17 15:24:26 +01:00
FearlessTobi
230ac6a4e8 input_common/udp: Fix Linux build by using a backwards compatible way of error checking
Should fix https://github.com/yuzu-emu/yuzu/issues/3487.

error_code::failed is a function which has been introduced in Boost 1.69.
This version of boost hasn't landed in most major distros yet.
2020-03-17 12:29:25 +01:00
Gauvain "GovanifY" Roussel-Tarbouriech
eae2ed6b07 gdbstub: small logic bug fix with defer_start 2020-03-17 11:18:39 +01:00
Gauvain "GovanifY" Roussel-Tarbouriech
38036eb1c8 gdbstub: Ensure gdbstub doesn't drop packets crucial to initialization 2020-03-17 11:18:13 +01:00
bunnei
e8ded20d24 Merge pull request #3521 from ReinUsesLisp/nsight-debug
renderer_opengl: Detect Nvidia Nsight as a debugging tool
2020-03-16 22:52:42 -04:00
ReinUsesLisp
53d673a7d3 renderer_opengl: Move some logic to an anonymous namespace 2020-03-16 04:03:34 -03:00
ReinUsesLisp
311d2fc768 renderer_opengl: Detect Nvidia Nsight as a debugging tool
Use getenv to detect Nsight.
2020-03-16 03:59:08 -03:00
Rodrigo Locatti
b16c8e0e8d Merge pull request #3515 from ReinUsesLisp/vertex-vk-assert
vk_rasterizer: Fix vertex range assert
2020-03-15 21:26:54 -03:00
Rodrigo Locatti
7cc46a6faa Merge pull request #3501 from ReinUsesLisp/rgba16-snorm
video_core: Implement RGBA16_SNORM
2020-03-15 21:24:53 -03:00
Rodrigo Locatti
ddafc99776 Merge pull request #3502 from namkazt/patch-3
shader_decode: Reimplement BFE instructions
2020-03-15 21:23:04 -03:00
Rodrigo Locatti
d64edf21bb Merge pull request #3503 from makigumo/patch-2
maxwell_to_vk: add vertex format eA2B10G10R10UnormPack32
2020-03-15 21:21:38 -03:00
ReinUsesLisp
5afc397d52 gl_shader_decompiler: Implement legacy varyings
Legacy varyings are special attributes carried over in hardware from
the OpenGL 1 and OpenGL 2 days. These were generally used instead of the
generic attributes we use today. They are deprecated or removed from
most APIs, but Nvidia still ships them in hardware.

To implement these, this commit maps them 1:1 to OpenGL compatibility.
2020-03-15 21:03:59 -03:00
ReinUsesLisp
6442e02c5d shader/shader_ir: Track usage in input attribute and of legacy varyings 2020-03-15 21:01:52 -03:00
ReinUsesLisp
8e6e55d6f8 shader/shader_ir: Fix clip distance usage stores 2020-03-15 20:53:14 -03:00
ReinUsesLisp
464bd5fad7 shader/shader_ir: Change declare output attribute to a switch 2020-03-15 20:49:35 -03:00
Rodrigo Locatti
86b1f15d9a Merge pull request #3512 from bunnei/fix-renderdoc
renderer_opengl: Keep frames synchronized when using a GPU debugger.
2020-03-15 19:28:43 -03:00
ReinUsesLisp
52acb7f9a0 maxwell_to_vk: Implement RG32 and RGB32 integer vertex formats 2020-03-15 18:51:49 -03:00
Rodrigo Locatti
d91a880f11 Merge pull request #3516 from makigumo/patch-3
vk_shader_decompiler: fix linux build
2020-03-15 18:43:40 -03:00
ReinUsesLisp
71cc772988 vk_rasterizer: Implement layered clears 2020-03-15 18:37:19 -03:00
makigumo
f91046bf8d vk_shader_decompiler: fix linux build 2020-03-15 18:00:14 +01:00
ReinUsesLisp
a7131af7d6 vk_rasterizer: Fix vertex range assert
End can be equal to start in CalculateVertexArraysSize. This is quite
common when the vertex size is zero.
2020-03-15 04:04:17 -03:00
ReinUsesLisp
8baf98e439 vk_rasterizer: Reimplement clears with vkCmdClearAttachments 2020-03-15 03:40:41 -03:00
bunnei
c5afe93dcc renderer_opengl: Keep presentation frames in lock-step when GPU debugging.
- Fixes renderdoc with OpenGL renderer.
2020-03-14 17:45:01 -04:00
bunnei
4373fa8042 gl_device: Add option to check GL_EXT_debug_tool. 2020-03-14 17:39:29 -04:00
bunnei
4dfd5c84ea Merge pull request #3508 from FernandoS27/page-table
PageTable: move backing addresses to a children class as the CPU page table does not need them.
2020-03-14 16:50:27 -04:00
Fernando Sahmkow
380fc8d2e1 DirtyFlags: relax need to set render_targets as dirty
The texture cache already takes care of setting a render target to dirty 
when invalidated.
2020-03-14 11:47:33 -04:00
Fernando Sahmkow
c51dbf8038 Merge pull request #3500 from ReinUsesLisp/incompatible-types
texture_cache: Report incompatible textures as black
2020-03-14 09:49:05 -04:00
Fernando Sahmkow
41905ee467 Merge pull request #3499 from ReinUsesLisp/depth-2d-array
texture_cache/surface_params: Force depth=1 on 2D textures
2020-03-14 09:48:39 -04:00
Fernando Sahmkow
35145bd529 Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
2020-03-14 09:48:15 -04:00
Fernando Sahmkow
27cbb75e7c PageTable: move backing addresses to a children class as the CPU page table does not need them.
This PR aims to reduce the memory usage in the CPU page table by moving
GPU specific parameters into a child class. This saves 1Gb of Memory for
most games.
2020-03-14 09:43:57 -04:00
ReinUsesLisp
42cb8f1124 astc: Fix typos from search and replace 2020-03-14 01:05:20 -03:00
ReinUsesLisp
9b8fb3c756 astc: Minor changes to InputBitStream 2020-03-14 00:45:54 -03:00
ReinUsesLisp
d71d7d917e astc: Pass val in Replicate by copy 2020-03-14 00:13:58 -03:00
ReinUsesLisp
134f3ff9b4 astc: Call std::vector:reserve on decodedClolorValues to avoid reallocating 2020-03-14 00:09:56 -03:00
ReinUsesLisp
3377b78ea7 astc: Call std::vector::reserve on texelWeightValues to avoid reallocating 2020-03-13 23:52:51 -03:00
ReinUsesLisp
801fd04f75 astc: Create a LUT at compile time for encoding values 2020-03-13 23:40:02 -03:00
ReinUsesLisp
e183820956 astc: Make IntegerEncodedValue a trivial structure 2020-03-13 22:49:28 -03:00
ReinUsesLisp
70a31eda62 astc: Make IntegerEncodedValue constructor constexpr 2020-03-13 22:36:45 -03:00
ReinUsesLisp
5ed377b989 astc: Make IntegerEncodedValue trivially copyable 2020-03-13 22:30:31 -03:00
ReinUsesLisp
e7d97605e8 astc: Rename C types to common_types 2020-03-13 22:28:51 -03:00
ReinUsesLisp
835a3d09c6 astc: Move Popcnt to an anonymous namespace and make it constexpr 2020-03-13 22:26:48 -03:00
ReinUsesLisp
731a9a322e astc: Use common types instead of stdint.h integer types 2020-03-13 22:22:27 -03:00
ReinUsesLisp
d3dc4e399c astc: Use 'enum class' instead of 'enum' for EIntegerEncoding 2020-03-13 22:20:12 -03:00
ReinUsesLisp
69c7a01f88 vk/gl_shader_decompiler: Silence assertion on compute 2020-03-13 18:33:05 -03:00
ReinUsesLisp
62560f1e63 vk_shader_decompiler: Fix default varying regression 2020-03-13 18:33:05 -03:00
ReinUsesLisp
afebdda203 maxwell_3d: Add padding words to XFB entries
Use INSERT_UNION_PADDING_WORDS instead of alignas to ensure a size
requirement.
2020-03-13 18:33:05 -03:00
ReinUsesLisp
4bc4851d45 gl_shader_decompiler: Fix implicit conversion errors 2020-03-13 18:33:05 -03:00
Rodrigo Locatti
47459f6a36 vk_shader_decompiler: Fix implicit type conversion
Co-Authored-By: Mat M. <mathew1800@gmail.com>
2020-03-13 18:33:05 -03:00
ReinUsesLisp
2fae1e6205 vk_rasterizer: Implement transform feedback binding zero 2020-03-13 18:33:05 -03:00
ReinUsesLisp
b67360c0f8 vk_shader_decompiler: Add XFB decorations to generic varyings 2020-03-13 18:33:05 -03:00
ReinUsesLisp
8d5bdcb17b vk_device: Enable VK_EXT_transform_feedback when available 2020-03-13 18:33:05 -03:00
ReinUsesLisp
c320702092 vk_device: Shrink formatless capability name size 2020-03-13 18:33:05 -03:00
ReinUsesLisp
ae6189d7c2 shader/transform_feedback: Expose buffer stride 2020-03-13 18:33:05 -03:00
ReinUsesLisp
7acebd7eb6 vk_shader_decompiler: Use registry for specialization 2020-03-13 18:33:05 -03:00
ReinUsesLisp
8e9f23f393 gl_rasterizer: Implement transform feedback bindings 2020-03-13 18:33:04 -03:00
ReinUsesLisp
4d711dface gl_shader_decompiler: Decorate output attributes with XFB layout
We sometimes have to slice attributes in different parts. This is needed
for example in instances where the game feedbacks 3 components but
writes 4 from the shader (something that is possible with
GL_NV_transform_feedback).
2020-03-13 18:33:04 -03:00
ReinUsesLisp
3dcaa84ba4 shader/transform_feedback: Add host API friendly TFB builder 2020-03-13 18:33:04 -03:00
Fernando Sahmkow
666d431ad8 Merge pull request #3473 from ReinUsesLisp/shader-purge
gl_shader_cache: Rework shader cache and store texture arrays
2020-03-13 16:26:24 -04:00
Rodrigo Locatti
244fe13219 Merge branch 'master' into shader-purge 2020-03-13 16:44:06 -03:00
bunnei
b30b1f741d Merge pull request #3491 from ReinUsesLisp/polygon-modes
gl_rasterizer: Implement polygon modes and fill rectangles
2020-03-13 10:08:57 -04:00
Nguyen Dac Nam
829f424618 nit & remove some optional param 2020-03-13 20:47:38 +07:00
Nguyen Dac Nam
a166217480 shader_decode: implement XMAD mode CSfu 2020-03-13 19:01:49 +07:00
makigumo
753bc2026f fix formatting 2020-03-13 11:37:24 +01:00
makigumo
54681909be maxwell_to_vk: add vertex format eA2B10G10R10UnormPack32 2020-03-13 11:26:13 +01:00
ReinUsesLisp
e24197bb3f gl_shader_decompiler: Initialize gl_Position on vertex shaders 2020-03-12 23:31:06 -03:00
ReinUsesLisp
3a10016e38 gl_shader_decompiler: Add missing {} on smem GLSL emission 2020-03-12 21:50:37 -03:00
ReinUsesLisp
4dcca90ef4 video_core: Implement RGBA16_SNORM
Implement RGBA16_SNORM with the current API. Nothing special here.
2020-03-12 21:42:33 -03:00
ReinUsesLisp
e22816a5bb texture_cache: Report incompatible textures as black
Some games bind incompatible texture types to certain types.
For example Astral Chain binds a 2D texture with 1 layer (non-array) to
a cubemap slot (that's how it's used in the shader). After testing this
in hardware, the expected "undefined behavior" is to report all pixels
as black.

We already have a path for reporting black textures in the texture
cache. When textures types are incompatible, this commit binds these
kind of textures. This is done on the API agnostic texture cache so no
extra code has to be inserted on OpenGL or Vulkan.

As a side effect, this fixes invalidations of ASTC textures on Astral
Chain. This happened because yuzu detected a cube texture and forced
6 faces, generating a texture larger than what the TIC reported.
2020-03-12 18:22:05 -03:00
ReinUsesLisp
daae6a323b texture_cache/surface_params: Force depth=1 on 2D textures
Sometimes games will sample a 2D array TIC with a 2D access in the
shader. This causes bad interactions with the rest of the texture cache.
To emulate what the game wants to do, force a depth=1 on 2D textures
(not 2D arrays) and let the texture cache handle the rest.
2020-03-12 18:11:42 -03:00
ReinUsesLisp
38fe070d78 gl_shader_decompiler: Add layer component to texelFetch
TexelFetch was not emitting the array component generating invalid GLSL.
2020-03-12 18:10:29 -03:00
ReinUsesLisp
825d629565 gl_shader_decompiler: Fix regression in render target declarations
A previous commit introduced a way to declare as few render targets as
possible. Turns out this introduced a regression in some games.
2020-03-12 05:01:20 -03:00
ReinUsesLisp
e4bc3c3342 gl_rasterizer: Implement polygon modes and fill rectangles 2020-03-09 20:39:58 -03:00
ReinUsesLisp
eb5861e0a2 engines/maxwell_3d: Add TFB registers and store them in shader registry 2020-03-09 18:40:53 -03:00
ReinUsesLisp
b1acb4f73f shader/registry: Address feedback 2020-03-09 18:40:53 -03:00
ReinUsesLisp
b1061afed9 gl_shader_decompiler: Add identifier to decompiled code 2020-03-09 18:40:53 -03:00
ReinUsesLisp
e612242977 gl_shader_decompiler: Roll back to GLSL core 430
RenderDoc won't build shaders if we use GLSL compatibility.
2020-03-09 18:40:53 -03:00
ReinUsesLisp
978172530e const_buffer_engine_interface: Store component types
This is required for Vulkan. Sampling integer textures with float
handles is illegal.
2020-03-09 18:40:53 -03:00
ReinUsesLisp
120f688272 yuzu/loading_screen: Remove unused shader progress mode 2020-03-09 18:40:53 -03:00
ReinUsesLisp
e1932351a9 gl_shader_cache: Reduce registry consistency to debug assert
Registry consistency is something that practically can't happen and it
has a measurable runtime cost. Reduce it to a DEBUG_ASSERT.
2020-03-09 18:40:07 -03:00
ReinUsesLisp
66a8a3e887 shader/registry: Cache tessellation state 2020-03-09 18:40:07 -03:00
ReinUsesLisp
0528be5c92 shader/registry: Store graphics and compute metadata
Store information GLSL forces us to provide but it's dynamic state in
hardware (workgroup sizes, primitive topology, shared memory size).
2020-03-09 18:40:07 -03:00
ReinUsesLisp
e8efd5a901 video_core: Rename "const buffer locker" to "registry" 2020-03-09 18:40:06 -03:00
ReinUsesLisp
bd8b9bbcee gl_shader_cache: Rework shader cache and remove post-specializations
Instead of pre-specializing shaders and then post-specializing them,
drop the later and only "specialize" the shader while decoding it.
2020-03-09 18:40:06 -03:00
FearlessTobi
7cbe6748c3 yuzu: Remove exit lock for game pausing
This removes the "exit lock" popup from yuzu when pausing a game.

Motivation
The exit lock feature is broken in many ways and doesn't work properly in a lot of games, causing it to appear every time you want to pause the game or stop it, even in places where it wouldn't on Switch.

Additionally, the feature of pausing a game doesn't exist like this on Switch and yuzu should be guaranteed to be deterministic anyway, so pausing the emulation shouldn't be able to interrupt any critical processes in any way.
2020-02-24 17:51:17 +01:00
91 changed files with 2989 additions and 2505 deletions

View File

@@ -6,9 +6,9 @@ function(copy_yuzu_Qt5_deps target_dir)
set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/")
set(Qt5_IMAGEFORMATS_DIR "${Qt5_DIR}/../../../plugins/imageformats/")
set(Qt5_RESOURCES_DIR "${Qt5_DIR}/../../../resources/")
set(PLATFORMS ${DLL_DEST}platforms/)
set(STYLES ${DLL_DEST}styles/)
set(IMAGEFORMATS ${DLL_DEST}imageformats/)
set(PLATFORMS ${DLL_DEST}plugins/platforms/)
set(STYLES ${DLL_DEST}plugins/styles/)
set(IMAGEFORMATS ${DLL_DEST}plugins/imageformats/)
windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST}
icudt*.dll
icuin*.dll
@@ -42,11 +42,15 @@ function(copy_yuzu_Qt5_deps target_dir)
icudtl.dat
)
endif ()
windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*)
windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*)
windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS}
qjpeg$<$<CONFIG:Debug>:d>.*
qgif$<$<CONFIG:Debug>:d>.*
)
# Create an empty qt.conf file. Qt will detect that this file exists, and use the folder that its in as the root folder.
# This way it'll look for plugins in the root/plugins/ folder
add_custom_command(TARGET yuzu POST_BUILD
COMMAND ${CMAKE_COMMAND} -E touch ${DLL_DEST}qt.conf
)
endfunction(copy_yuzu_Qt5_deps)

View File

@@ -57,8 +57,6 @@ set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -91,8 +89,6 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
@@ -101,9 +97,13 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
"${VIDEO_CORE}/shader/node_helper.h"
"${VIDEO_CORE}/shader/registry.cpp"
"${VIDEO_CORE}/shader/registry.h"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
"${VIDEO_CORE}/shader/transform_feedback.cpp"
"${VIDEO_CORE}/shader/transform_feedback.h"
)
set(COMBINED "")
foreach (F IN LISTS HASH_FILES)

View File

@@ -2,6 +2,7 @@ yuzu emulator
=============
[![Travis CI Build Status](https://travis-ci.com/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.com/yuzu-emu/yuzu)
[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
[![Discord](https://img.shields.io/discord/398318088170242053?color=%237289DA&label=yuzu&logo=discord&logoColor=white)](https://discord.gg/XQV6dn9)
yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
@@ -21,7 +22,7 @@ For development discussion, please join us on [Discord](https://discord.gg/XQV6d
Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted.
If you want to contribute please take a look at the [Contributor's Guide](CONTRIBUTING.md) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should also contact any of the developers on Discord in order to know about the current state of the emulator.
If you want to contribute please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should also contact any of the developers on Discord in order to know about the current state of the emulator.
### Building

6
dist/license.md vendored
View File

@@ -2,8 +2,8 @@ The icons in this folder and its subfolders have the following licenses:
Icon Name | License | Origin/Author
--- | --- | ---
qt_themes/default/icons/16x16/checked.png | Free for non-commercial use
qt_themes/default/icons/16x16/failed.png | Free for non-commercial use
qt_themes/default/icons/16x16/checked.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/16x16/failed.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/16x16/lock.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/256x256/plus_folder.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/48x48/bad_folder.png | CC BY-ND 3.0 | https://icons8.com
@@ -11,8 +11,6 @@ qt_themes/default/icons/48x48/chip.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/48x48/folder.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/default/icons/48x48/plus.png | CC0 1.0 | Designed by BreadFish64 from the Citra team
qt_themes/default/icons/48x48/sd_card.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/qdarkstyle/icons/16x16/checked.png | Free for non-commercial use
qt_themes/qdarkstyle/icons/16x16/failed.png | Free for non-commercial use
qt_themes/qdarkstyle/icons/16x16/lock.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/qdarkstyle/icons/256x256/plus_folder.png | CC BY-ND 3.0 | https://icons8.com
qt_themes/qdarkstyle/icons/48x48/bad_folder.png | CC BY-ND 3.0 | https://icons8.com

Binary file not shown.

Before

Width:  |  Height:  |  Size: 451 B

After

Width:  |  Height:  |  Size: 657 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 428 B

After

Width:  |  Height:  |  Size: 524 B

View File

@@ -828,7 +828,7 @@ inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfi
MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick);
int t = MicroProfileLogType(Entry);
uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry);
MP_ASSERT(t == nBegin);
MP_ASSERT((uint64_t)t == nBegin);
MP_ASSERT(nTimerIndex == (nToken&0x3fff));
return Entry;
@@ -1556,10 +1556,10 @@ void MicroProfileFlip()
pFramePut->nFrameStartCpu = MP_TICK();
pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp();
if(pFrameNext->nFrameStartGpu != (uint64_t)-1)
if(pFrameNext->nFrameStartGpu != -1)
pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1)
if(pFrameCurrent->nFrameStartGpu == -1)
pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1;
uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;

View File

@@ -343,8 +343,8 @@ The icons used in this project have the following licenses:
Icon Name | License | Origin/Author
--- | --- | ---
checked.png | Free for non-commercial use
failed.png | Free for non-commercial use
checked.png | CC BY-ND 3.0 | https://icons8.com
failed.png | CC BY-ND 3.0 | https://icons8.com
lock.png | CC BY-ND 3.0 | https://icons8.com
plus_folder.png (Default, Dark) | CC BY-ND 3.0 | https://icons8.com
bad_folder.png (Default, Dark) | CC BY-ND 3.0 | https://icons8.com

View File

@@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
@@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
"${VIDEO_CORE}/shader/node_helper.h"
"${VIDEO_CORE}/shader/registry.cpp"
"${VIDEO_CORE}/shader/registry.h"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
"${VIDEO_CORE}/shader/transform_feedback.cpp"
"${VIDEO_CORE}/shader/transform_feedback.h"
# and also check that the scm_rev files haven't changed
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"

View File

@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
backing_addr.resize(num_page_table_entries);
// The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
// vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.shrink_to_fit();
attributes.shrink_to_fit();
}
BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
BackingPageTable::~BackingPageTable() = default;
void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
PageTable::Resize(address_space_width_in_bits);
const std::size_t num_page_table_entries = 1ULL
<< (address_space_width_in_bits - page_size_in_bits);
backing_addr.resize(num_page_table_entries);
backing_addr.shrink_to_fit();
}

View File

@@ -76,9 +76,20 @@ struct PageTable {
*/
std::vector<PageType> attributes;
std::vector<u64> backing_addr;
const std::size_t page_size_in_bits{};
};
/**
* A more advanced Page Table with the ability to save a backing address when using it
* depends on another MMU.
*/
struct BackingPageTable : PageTable {
explicit BackingPageTable(std::size_t page_size_in_bits);
~BackingPageTable();
void Resize(std::size_t address_space_width_in_bits);
std::vector<u64> backing_addr;
};
} // namespace Common

View File

@@ -166,7 +166,7 @@ struct System::Impl {
service_manager = std::make_shared<Service::SM::ServiceManager>();
Service::Init(service_manager, system);
GDBStub::Init();
GDBStub::DeferStart();
renderer = VideoCore::CreateRenderer(emu_window, system);
if (!renderer->Init()) {

View File

@@ -141,6 +141,7 @@ constexpr char target_xml[] =
)";
int gdbserver_socket = -1;
bool defer_start = false;
u8 command_buffer[GDB_BUFFER_SIZE];
u32 command_length;
@@ -1166,6 +1167,9 @@ static void RemoveBreakpoint() {
void HandlePacket() {
if (!IsConnected()) {
if (defer_start) {
ToggleServer(true);
}
return;
}
@@ -1256,6 +1260,10 @@ void ToggleServer(bool status) {
}
}
void DeferStart() {
defer_start = true;
}
static void Init(u16 port) {
if (!server_enabled) {
// Set the halt loop to false in case the user enabled the gdbstub mid-execution.
@@ -1341,6 +1349,7 @@ void Shutdown() {
if (!server_enabled) {
return;
}
defer_start = false;
LOG_INFO(Debug_GDBStub, "Stopping GDB ...");
if (gdbserver_socket != -1) {

View File

@@ -43,6 +43,13 @@ void ToggleServer(bool status);
/// Start the gdbstub server.
void Init();
/**
* Defer initialization of the gdbstub to the first packet processing functions.
* This avoids a case where the gdbstub thread is frozen after initialization
* and fails to respond in time to packets.
*/
void DeferStart();
/// Stop gdbstub server.
void Shutdown();

View File

@@ -254,6 +254,12 @@ void WebBrowser::Execute() {
if (status != RESULT_SUCCESS) {
complete = true;
// This is a workaround in order not to softlock yuzu when an error happens during the
// webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS
Finalize();
status = RESULT_SUCCESS;
return;
}

View File

@@ -111,6 +111,14 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
rb.PushEnum(available_language_codes[Settings::values.language_index]);
}
void SET::GetRegionCode(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SET, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(Settings::values.region_index);
}
SET::SET() : ServiceFramework("set") {
// clang-format off
static const FunctionInfo functions[] = {
@@ -118,7 +126,7 @@ SET::SET() : ServiceFramework("set") {
{1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"},
{2, &SET::MakeLanguageCode, "MakeLanguageCode"},
{3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"},
{4, nullptr, "GetRegionCode"},
{4, &SET::GetRegionCode, "GetRegionCode"},
{5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"},
{6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"},
{7, nullptr, "GetKeyCodeMap"},

View File

@@ -43,6 +43,7 @@ private:
void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx);
void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx);
void GetQuestFlag(Kernel::HLERequestContext& ctx);
void GetRegionCode(Kernel::HLERequestContext& ctx);
};
} // namespace Service::Set

View File

@@ -44,7 +44,7 @@ void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u16>(0x500);
rb.Push<u16>(0x1000);
}
Controller::Controller() : ServiceFramework("IpcController") {

View File

@@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
return {};
}
std::vector<char> raw_data(binary_list->GetSize());
std::vector<char> raw_data(binary_list->GetSize() + 1);
binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize());
std::stringstream data_stream{raw_data.data()};

View File

@@ -86,6 +86,7 @@ void LogSettings() {
LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
LogSetting("System_CurrentUser", Settings::values.current_user);
LogSetting("System_LanguageIndex", Settings::values.language_index);
LogSetting("System_RegionIndex", Settings::values.region_index);
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);

View File

@@ -387,6 +387,8 @@ struct Values {
s32 current_user;
s32 language_index;
s32 region_index;
s32 sound_index;
// Controls
std::array<PlayerInput, 10> players;

View File

@@ -35,7 +35,7 @@ public:
pad_index(pad_index) {
boost::system::error_code ec{};
auto ipv4 = boost::asio::ip::make_address_v4(host, ec);
if (ec.failed()) {
if (ec.value() != boost::system::errc::success) {
LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host);
ipv4 = boost::asio::ip::address_v4{};
}

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <mutex>
#include <optional>
#include <tuple>
#include "common/param_package.h"
@@ -44,7 +45,7 @@ public:
std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override {
{
std::lock_guard guard(status->update_mutex);
status->touch_calibration.emplace();
status->touch_calibration = DeviceStatus::CalibrationData{};
// These default values work well for DS4 but probably not other touch inputs
status->touch_calibration->min_x = params.Get("min_x", 100);
status->touch_calibration->min_y = params.Get("min_y", 50);

View File

@@ -65,8 +65,6 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_disk_cache.cpp
renderer_opengl/gl_shader_disk_cache.h
renderer_opengl/gl_shader_gen.cpp
renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
@@ -118,8 +116,6 @@ add_library(video_core STATIC
shader/ast.h
shader/compiler_settings.cpp
shader/compiler_settings.h
shader/const_buffer_locker.cpp
shader/const_buffer_locker.h
shader/control_flow.cpp
shader/control_flow.h
shader/decode.cpp
@@ -128,9 +124,13 @@ add_library(video_core STATIC
shader/node_helper.cpp
shader/node_helper.h
shader/node.h
shader/registry.cpp
shader/registry.h
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
shader/transform_feedback.cpp
shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/format_lookup_table.cpp

View File

@@ -15,14 +15,6 @@ namespace VideoCommon::Dirty {
using Tegra::Engines::Maxwell3D;
void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) {
store[RenderTargets] = true;
store[ZetaBuffer] = true;
for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) {
store[ColorBuffer0 + i] = true;
}
}
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);

View File

@@ -44,8 +44,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
FillBlock(tables[1], begin, num, index_b);
}
void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store);
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
} // namespace VideoCommon::Dirty

View File

@@ -16,11 +16,16 @@ namespace Tegra::Engines {
struct SamplerDescriptor {
union {
BitField<0, 20, Tegra::Shader::TextureType> texture_type;
BitField<20, 1, u32> is_array;
BitField<21, 1, u32> is_buffer;
BitField<22, 1, u32> is_shadow;
u32 raw{};
u32 raw = 0;
BitField<0, 2, Tegra::Shader::TextureType> texture_type;
BitField<2, 3, Tegra::Texture::ComponentType> r_type;
BitField<5, 1, u32> is_array;
BitField<6, 1, u32> is_buffer;
BitField<7, 1, u32> is_shadow;
BitField<8, 3, Tegra::Texture::ComponentType> g_type;
BitField<11, 3, Tegra::Texture::ComponentType> b_type;
BitField<14, 3, Tegra::Texture::ComponentType> a_type;
BitField<17, 7, Tegra::Texture::TextureFormat> format;
};
bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +36,50 @@ struct SamplerDescriptor {
return !operator==(rhs);
}
static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
using Tegra::Shader::TextureType;
SamplerDescriptor result;
switch (tic_texture_type) {
result.format.Assign(tic.format.Value());
result.r_type.Assign(tic.r_type.Value());
result.g_type.Assign(tic.g_type.Value());
result.b_type.Assign(tic.b_type.Value());
result.a_type.Assign(tic.a_type.Value());
switch (tic.texture_type.Value()) {
case Tegra::Texture::TextureType::Texture1D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture1D);
return result;
case Tegra::Texture::TextureType::Texture2D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::Texture3D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture3D);
return result;
case Tegra::Texture::TextureType::TextureCubemap:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::TextureCube);
return result;
case Tegra::Texture::TextureType::Texture1DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.texture_type.Assign(TextureType::Texture1D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.texture_type.Assign(TextureType::Texture2D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture1DBuffer:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.texture_type.Assign(TextureType::Texture1D);
result.is_buffer.Assign(1);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DNoMipmap:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::TextureCubeArray:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.texture_type.Assign(TextureType::TextureCube);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
default:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
}
}

View File

@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}
@@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
const auto r_type{tic_entry.r_type.Value()};
const auto g_type{tic_entry.g_type.Value()};
const auto b_type{tic_entry.b_type.Value()};
const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
return tic_entry;
}

View File

@@ -98,6 +98,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.framebuffer_srgb = 1;
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
shadow_state = regs;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
@@ -160,8 +162,17 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
u32 arg = method_call.argument;
// Keep track of the register value in shadow_state when requested.
if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
shadow_state.reg_array[method] = arg;
} else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
arg = shadow_state.reg_array[method];
}
if (regs.reg_array[method] != arg) {
regs.reg_array[method] = arg;
for (const auto& table : dirty.tables) {
dirty.flags[table[method]] = true;
@@ -169,12 +180,16 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
}
switch (method) {
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument);
break;
}
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument);
ProcessMacroUpload(arg);
break;
}
case MAXWELL3D_REG_INDEX(macros.bind): {
ProcessMacroBind(method_call.argument);
ProcessMacroBind(arg);
break;
}
case MAXWELL3D_REG_INDEX(firmware[4]): {
@@ -250,7 +265,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
}
case MAXWELL3D_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
upload_state.ProcessData(arg, is_last_call);
if (is_last_call) {
OnMemoryWrite();
}
@@ -638,7 +653,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}

View File

@@ -67,6 +67,7 @@ public:
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t NumTransformFeedbackBuffers = 4;
static constexpr std::size_t MaxShaderProgram = 6;
static constexpr std::size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
@@ -524,6 +525,23 @@ public:
FractionalEven = 2,
};
enum class PolygonMode : u32 {
Point = 0x1b00,
Line = 0x1b01,
Fill = 0x1b02,
};
enum class ShadowRamControl : u32 {
// write value to shadow ram
Track = 0,
// write value to shadow ram ( with validation ??? )
TrackWithFilter = 1,
// only write to real hw register
Passthrough = 2,
// write value from shadow ram to real hw register
Replay = 3,
};
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -621,6 +639,29 @@ public:
float depth_range_far;
};
struct TransformFeedbackBinding {
u32 buffer_enable;
u32 address_high;
u32 address_low;
s32 buffer_size;
s32 buffer_offset;
INSERT_UNION_PADDING_WORDS(3);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
};
static_assert(sizeof(TransformFeedbackBinding) == 32);
struct TransformFeedbackLayout {
u32 stream;
u32 varying_count;
u32 stride;
INSERT_UNION_PADDING_WORDS(1);
};
static_assert(sizeof(TransformFeedbackLayout) == 16);
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -629,6 +670,10 @@ public:
return shader_config[index].enable != 0;
}
bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
return IsShaderConfigEnabled(static_cast<std::size_t>(type));
}
union {
struct {
INSERT_UNION_PADDING_WORDS(0x45);
@@ -640,7 +685,9 @@ public:
u32 bind;
} macros;
INSERT_UNION_PADDING_WORDS(0x17);
ShadowRamControl shadow_ram_control;
INSERT_UNION_PADDING_WORDS(0x16);
Upload::Registers upload;
struct {
@@ -677,7 +724,13 @@ public:
u32 rasterize_enable;
INSERT_UNION_PADDING_WORDS(0xF1);
std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
INSERT_UNION_PADDING_WORDS(0xC0);
std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
INSERT_UNION_PADDING_WORDS(0x1);
u32 tfb_enabled;
@@ -705,7 +758,12 @@ public:
s32 clear_stencil;
INSERT_UNION_PADDING_WORDS(0x7);
INSERT_UNION_PADDING_WORDS(0x2);
PolygonMode polygon_mode_front;
PolygonMode polygon_mode_back;
INSERT_UNION_PADDING_WORDS(0x3);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
@@ -764,7 +822,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
INSERT_UNION_PADDING_WORDS(0x19);
INSERT_UNION_PADDING_WORDS(0x10);
u32 fill_rectangle;
INSERT_UNION_PADDING_WORDS(0x8);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -1187,7 +1249,11 @@ public:
u32 tex_cb_index;
INSERT_UNION_PADDING_WORDS(0x395);
INSERT_UNION_PADDING_WORDS(0x7D);
std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
INSERT_UNION_PADDING_WORDS(0x298);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1210,7 +1276,12 @@ public:
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
};
Regs regs{};
/// Store temporary hw register values, used by some calls to restore state after a operation
Regs shadow_state;
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -1405,6 +1476,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(shadow_ram_control, 0x49);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
@@ -1413,6 +1485,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
ASSERT_REG_POSITION(tfb_bindings, 0xE0);
ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1422,6 +1496,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
@@ -1435,6 +1511,7 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
@@ -1508,6 +1585,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
ASSERT_REG_POSITION(ssbo_info, 0xD18);
ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);

View File

@@ -82,6 +82,10 @@ union Attribute {
Position = 7,
Attribute_0 = 8,
Attribute_31 = 39,
FrontColor = 40,
FrontSecondaryColor = 41,
BackColor = 42,
BackSecondaryColor = 43,
ClipDistances0123 = 44,
ClipDistances4567 = 45,
PointCoord = 46,
@@ -89,6 +93,8 @@ union Attribute {
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
TessCoordInstanceIDVertexID = 47,
TexCoord_0 = 48,
TexCoord_7 = 55,
// This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
// shader. It is unknown what the other values contain.
FrontFacing = 63,

View File

@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_UNORM = 0xC6,
RGBA16_SNORM = 0xC7,
RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,

View File

@@ -4,13 +4,15 @@
#include <algorithm>
#include <limits>
#include <vector>
#include "common/common_types.h"
#include "video_core/guest_driver.h"
namespace VideoCore {
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
if (texture_handler_size_deduced) {
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
if (texture_handler_size) {
return;
}
const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
if (min_val > 2) {
return;
}
texture_handler_size_deduced = true;
texture_handler_size = min_texture_handler_size * min_val;
}

View File

@@ -4,6 +4,7 @@
#pragma once
#include <optional>
#include <vector>
#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
*/
class GuestDriverProfile {
public:
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
explicit GuestDriverProfile() = default;
explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
: texture_handler_size{texture_handler_size} {}
void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
u32 GetTextureHandlerSize() const {
return texture_handler_size;
return texture_handler_size.value_or(default_texture_handler_size);
}
bool TextureHandlerSizeKnown() const {
return texture_handler_size_deduced;
bool IsTextureHandlerSizeKnown() const {
return texture_handler_size.has_value();
}
private:
// Minimum size of texture handler any driver can use.
static constexpr u32 min_texture_handler_size = 4;
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
// use 4 bytes instead. Thus, certain drivers may squish the size.
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
// Thus, certain drivers may squish the size.
static constexpr u32 default_texture_handler_size = 8;
u32 texture_handler_size = default_texture_handler_size;
bool texture_handler_size_deduced = false;
std::optional<u32> texture_handler_size = default_texture_handler_size;
};
} // namespace VideoCore

View File

@@ -174,7 +174,7 @@ private:
/// End of address space, based on address space in bits.
static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
Common::PageTable page_table{page_bits};
Common::BackingPageTable page_table{page_bits};
VMAMap vma_map;
VideoCore::RasterizerInterface& rasterizer;

View File

@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
MortonCopy<true, PixelFormat::RGBA16S>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::RGBA16S>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,

View File

@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
enum class LoadCallbackStage {
Prepare,
Decompile,
Build,
Complete,
};

View File

@@ -28,7 +28,6 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) {
const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
@@ -94,10 +93,6 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
void oglEnablei(GLenum cap, bool state, GLuint index) {
(state ? glEnablei : glDisablei)(cap, index);
}
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
@@ -272,9 +267,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader);
const ProgramVariant variant(primitive_mode);
const auto program_handle = shader->GetHandle(variant);
const GLuint program_handle = shader->GetHandle();
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
@@ -295,7 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
clip_distances |= shader->GetShaderEntries().clip_distances;
clip_distances |= shader->GetEntries().clip_distances;
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
@@ -481,12 +474,12 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
query_cache.UpdateCounters();
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -498,7 +491,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncTransformFeedback();
SyncPointState();
SyncPolygonOffset();
SyncAlphaTest();
@@ -532,7 +524,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Upload vertex and index data.
SetupVertexBuffer();
SetupVertexInstances();
GLintptr index_buffer_offset;
GLintptr index_buffer_offset = 0;
if (is_indexed) {
index_buffer_offset = SetupIndexBuffer();
}
@@ -558,7 +550,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
ConfigureFramebuffers();
// Signal the buffer cache that we are not going to upload more things.
const bool invalidate = buffer_cache.Unmap();
buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
@@ -571,7 +563,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
glTextureBarrier();
}
++num_queued_commands;
BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
@@ -610,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
EndTransformFeedback();
++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -622,12 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
launch_desc.block_dim_z, launch_desc.shared_alloc,
launch_desc.local_pos_alloc);
program_manager.BindComputeShader(kernel->GetHandle(variant));
program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -645,6 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -749,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
const auto& shader_stage = stages[stage_index];
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry);
}
@@ -760,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
for (const auto& entry : kernel->GetEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
@@ -772,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
}
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) {
const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -796,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
for (const auto& entry : shader->GetEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -810,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
for (const auto& entry : kernel->GetEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -818,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
@@ -830,7 +822,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetShaderEntries().samplers) {
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
@@ -843,7 +835,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().samplers) {
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
@@ -852,7 +844,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry) {
const SamplerEntry& entry) {
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
@@ -875,7 +867,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetShaderEntries().images) {
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
@@ -885,14 +877,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetShaderEntries().images) {
for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
const GLShader::ImageEntry& entry) {
const ImageEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
if (!view) {
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
@@ -941,13 +933,15 @@ void RasterizerOpenGL::SyncViewport() {
}
flags[Dirty::Viewport0 + i] = false;
const Common::Rectangle<f32> rect{regs.viewport_transform[i].GetRect()};
const auto& src = regs.viewport_transform[i];
const Common::Rectangle<f32> rect{src.GetRect()};
glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
rect.GetHeight());
const auto& src = regs.viewports[i];
glDepthRangeIndexed(static_cast<GLuint>(i), static_cast<GLdouble>(src.depth_range_near),
static_cast<GLdouble>(src.depth_range_far));
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
}
}
}
@@ -1096,6 +1090,45 @@ void RasterizerOpenGL::SyncRasterizeEnable() {
oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
if (gpu.regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
return;
}
flags[Dirty::PolygonModeFront] = true;
flags[Dirty::PolygonModeBack] = true;
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
return;
}
if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
@@ -1257,11 +1290,6 @@ void RasterizerOpenGL::SyncScissorTest() {
}
}
void RasterizerOpenGL::SyncTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
}
void RasterizerOpenGL::SyncPointState() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
@@ -1337,4 +1365,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
if (enabled_transform_feedback_buffers[index]) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
0);
}
enabled_transform_feedback_buffers[index] = false;
continue;
}
enabled_transform_feedback_buffers[index] = true;
auto& tfb_buffer = transform_feedback_buffers[index];
tfb_buffer.Create();
const GLuint handle = tfb_buffer.handle;
const std::size_t size = binding.buffer_size;
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
static_cast<GLsizeiptr>(size));
}
glBeginTransformFeedback(GL_POINTS);
}
void RasterizerOpenGL::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
glEndTransformFeedback();
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
continue;
}
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
}
}
} // namespace OpenGL

View File

@@ -98,7 +98,7 @@ private:
/// Configures a constant buffer.
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry);
const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -107,7 +107,7 @@ private:
void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer.
void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
@@ -118,7 +118,7 @@ private:
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry);
const SamplerEntry& entry);
/// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -127,8 +127,7 @@ private:
void SetupComputeImages(const Shader& shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
const GLShader::ImageEntry& entry);
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -169,15 +168,15 @@ private:
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the transform feedback state to match the guest state
void SyncTransformFeedback();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable();
/// Syncs polygon modes to match the guest state
void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
@@ -190,6 +189,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
@@ -227,6 +232,11 @@ private:
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;

View File

@@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <atomic>
#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -24,13 +28,14 @@
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
namespace {
@@ -56,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
std::size_t CalculateProgramSize(const ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
@@ -109,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
}
}
/// Describes primitive behavior on geometry shaders
constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
case GL_POINTS:
return {"points", 1};
case GL_LINES:
case GL_LINE_STRIP:
return {"lines", 2};
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return {"lines_adjacency", 4};
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return {"triangles", 3};
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return {"triangles_adjacency", 6};
default:
return {"points", 1};
}
}
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
const ProgramCode& code_b = {}) {
u64 unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
@@ -143,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
return unique_identifier;
}
/// Creates an unspecialized program from code streams
std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
const std::optional<ShaderIR>& ir_b) {
switch (shader_type) {
case ShaderType::Vertex:
return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
case ShaderType::Geometry:
return GLShader::GenerateGeometryShader(device, ir);
case ShaderType::Fragment:
return GLShader::GenerateFragmentShader(device, ir);
case ShaderType::Compute:
return GLShader::GenerateComputeShader(device, ir);
default:
UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
return {};
}
}
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
@@ -196,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
ShaderType shader_type) {
if (shader_type == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
const auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
}
}
std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
return std::make_unique<ConstBufferLocker>(shader_type,
GetConstBufferEngineInterface(system, shader_type));
}
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
locker.SetBoundBuffer(usage.bound_buffer);
for (const auto& key : usage.keys) {
const auto [buffer, offset] = key.first;
locker.InsertKey(buffer, offset, key.second);
for (const auto& [offset, sampler] : entry.bound_samplers) {
registry->InsertBoundSampler(offset, sampler);
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
locker.InsertBoundSampler(offset, sampler);
}
for (const auto& [key, sampler] : usage.bindless_samplers) {
for (const auto& [key, sampler] : entry.bindless_samplers) {
const auto [buffer, offset] = key;
locker.InsertBindlessSampler(buffer, offset, sampler);
registry->InsertBindlessSampler(buffer, offset, sampler);
}
return registry;
}
CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
const ProgramCode& code, const ProgramCode& code_b,
ConstBufferLocker& locker, const ProgramVariant& variant,
bool hint_retrievable = false) {
LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
const bool is_compute = shader_type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
std::optional<ShaderIR> ir_b;
if (!code_b.empty()) {
ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
}
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
)",
GetShaderId(unique_identifier, shader_type));
if (device.HasShaderBallot()) {
source += "#extension GL_ARB_shader_ballot : require\n";
}
if (device.HasVertexViewportLayer()) {
source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
}
if (device.HasImageLoadFormatted()) {
source += "#extension GL_EXT_shader_image_load_formatted : require\n";
}
if (device.HasWarpIntrinsics()) {
source += "#extension GL_NV_gpu_shader5 : require\n"
"#extension GL_NV_shader_thread_group : require\n"
"#extension GL_NV_shader_thread_shuffle : require\n";
}
// This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
// on places where we don't want to.
// Thanks to Ryujinx for finding this workaround.
source += "#pragma optionNV(fastmath off)\n";
if (shader_type == ShaderType::Geometry) {
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
source += fmt::format("layout ({}) in;\n", glsl_topology);
}
if (shader_type == ShaderType::Compute) {
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
source +=
fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
variant.block_x, variant.block_y, variant.block_z);
if (variant.shared_memory_size > 0) {
// shared_memory_size is described in number of words
source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
}
}
source += '\n';
source += GenerateGLSL(device, shader_type, ir, ir_b);
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
u64 unique_identifier, const ShaderIR& ir,
const Registry& registry, bool hint_retrievable = false) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
shader.Create(source.c_str(), GetGLShaderType(shader_type));
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
@@ -299,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
}
std::unordered_set<GLenum> GetSupportedFormats() {
GLint num_formats{};
GLint num_formats;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
@@ -314,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
: RasterizerCacheObject{params.host_ptr}, system{params.system},
disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, shader_type{shader_type},
entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
if (!params.precompiled_variants) {
return;
}
for (const auto& pair : *params.precompiled_variants) {
auto locker = MakeLocker(system, shader_type);
const auto& usage = pair->first;
FillLocker(*locker, usage);
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
std::unique_ptr<LockerVariant>* locker_variant = nullptr;
const auto it =
std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
return variant->locker->HasEqualKeys(*locker);
});
if (it == locker_variants.end()) {
locker_variant = &locker_variants.emplace_back();
*locker_variant = std::make_unique<LockerVariant>();
locker_variant->get()->locker = std::move(locker);
} else {
locker_variant = &*it;
}
locker_variant->get()->programs.emplace(usage.variant, pair->second);
}
CachedShader::~CachedShader() = default;
GLuint CachedShader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
return program->handle;
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type, ProgramCode code,
ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
params.disk_cache.SaveRaw(
ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
const std::size_t size_in_bytes = code.size() * sizeof(u64);
ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
return std::shared_ptr<CachedShader>(new CachedShader(
params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = shader_type;
entry.code = std::move(code);
entry.code_b = std::move(code_b);
entry.unique_identifier = params.unique_identifier;
entry.bound_buffer = registry->GetBoundBuffer();
entry.graphics_info = registry->GetGraphicsInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
params.disk_cache.SaveRaw(
ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
const std::size_t size_in_bytes = code.size() * sizeof(u64);
ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
params.system.GPU().KeplerCompute());
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
return std::shared_ptr<CachedShader>(new CachedShader(
params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
auto& engine = params.system.GPU().KeplerCompute();
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = ShaderType::Compute;
entry.code = std::move(code);
entry.unique_identifier = uid;
entry.bound_buffer = registry->GetBoundBuffer();
entry.compute_info = registry->GetComputeInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized) {
return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
unspecialized.entries, unspecialized.code,
unspecialized.code_b));
}
GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
EnsureValidLockerVariant();
const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
auto& program = entry->second;
if (!is_cache_miss) {
return program->handle;
}
program = BuildShader(device, unique_identifier, shader_type, code, code_b,
*curr_locker_variant->locker, variant);
disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
return program->handle;
}
bool CachedShader::EnsureValidLockerVariant() {
const auto previous_variant = curr_locker_variant;
if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
curr_locker_variant = nullptr;
}
if (!curr_locker_variant) {
for (auto& variant : locker_variants) {
if (variant->locker->IsConsistent()) {
curr_locker_variant = variant.get();
}
}
}
if (!curr_locker_variant) {
auto& new_variant = locker_variants.emplace_back();
new_variant = std::make_unique<LockerVariant>();
new_variant->locker = MakeLocker(system, shader_type);
curr_locker_variant = new_variant.get();
}
return previous_variant == curr_locker_variant;
}
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
const ConstBufferLocker& locker) const {
return ShaderDiskCacheUsage{unique_identifier, variant,
locker.GetBoundBuffer(), locker.GetKeys(),
locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -432,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
const auto transferable = disk_cache.LoadTransferable();
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
const auto [raws, shader_usages] = *transferable;
if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
return;
}
const auto dumps = disk_cache.LoadPrecompiled();
const std::vector gl_cache = disk_cache.LoadPrecompiled();
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -450,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// Inform the frontend about shader build initialization
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
}
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
std::atomic_bool compilation_failed = false;
std::atomic_bool gl_cache_failed = false;
const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
const ShaderDumpsMap& dumps) {
const auto find_precompiled = [&gl_cache](u64 id) {
return std::find_if(gl_cache.begin(), gl_cache.end(),
[id](const auto& entry) { return entry.unique_identifier == id; });
};
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
if (stop_loading) {
return;
}
const auto& usage{shader_usages[i]};
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
const auto dump{dumps.find(usage)};
const auto& entry = (*transferable)[i];
const u64 uid = entry.unique_identifier;
const auto it = find_precompiled(uid);
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
CachedProgram shader;
if (dump != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (!shader) {
compilation_failed = true;
return;
const bool is_compute = entry.type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
std::shared_ptr<OGLProgram> program;
if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
if (!program) {
gl_cache_failed = true;
}
}
if (!shader) {
auto locker{MakeLocker(system, unspecialized.type)};
FillLocker(*locker, usage);
shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
unspecialized.code, unspecialized.code_b, *locker,
usage.variant, true);
if (!program) {
// Otherwise compile it from GLSL
program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
PrecompiledShader shader;
shader.program = std::move(program);
shader.registry = std::move(registry);
shader.entries = MakeEntries(ir);
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
transferable->size());
}
precompiled_programs.emplace(usage, std::move(shader));
// TODO(Rodrigo): Is there a better way to do this?
precompiled_variants[usage.unique_identifier].push_back(
precompiled_programs.find(usage));
runtime_cache.emplace(entry.unique_identifier, std::move(shader));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
threads[i] = std::thread(worker, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
if (gl_cache_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
@@ -533,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{shader_usages[i]};
if (dumps.find(usage) == dumps.end()) {
const auto& program{precompiled_programs.at(usage)};
disk_cache.SaveDump(usage, program->handle);
for (std::size_t i = 0; i < transferable->size(); ++i) {
const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id);
if (it == gl_cache.end()) {
const GLuint program = runtime_cache.at(id).program->handle;
disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
}
@@ -547,80 +416,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
const auto it = precompiled_variants.find(unique_identifier);
return it == precompiled_variants.end() ? nullptr : &it->second;
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
CachedProgram shader = std::make_shared<OGLProgram>();
shader->handle = glCreateProgram();
glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
static_cast<GLsizei>(dump.binary.size()));
auto program = std::make_shared<OGLProgram>();
program->handle = glCreateProgram();
glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(program->handle, precompiled_entry.binary_format,
precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status{};
glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
GLint link_status;
glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
}
return shader;
}
bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws) {
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading) {
return false;
}
const auto& raw{raws[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - "
"removing shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateTransferable();
return false;
}
const u32 main_offset =
raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
ConstBufferLocker locker(raw.GetType());
const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (raw.HasProgramA()) {
// ir_b.emplace(raw.GetProgramCodeB(), main_offset);
// }
UnspecializedShader unspecialized;
unspecialized.entries = GLShader::GetEntries(ir);
unspecialized.type = raw.GetType();
unspecialized.code = raw.GetCode();
unspecialized.code_b = raw.GetCodeB();
unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
return true;
return program;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -648,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
std::move(code_b));
} else {
shader = CachedShader::CreateFromCache(params, found->second);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(shader);
@@ -673,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
return kernel;
}
// No kernel found - create a new one
// No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
kernel = CachedShader::CreateFromCache(params, found->second);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(kernel);

View File

@@ -22,7 +22,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
struct UnspecializedShader {
GLShader::ShaderEntries entries;
Tegra::Engines::ShaderType type;
ProgramCode code;
ProgramCode code_b;
struct PrecompiledShader {
std::shared_ptr<OGLProgram> program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
struct ShaderParameters {
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
class CachedShader final : public RasterizerCacheObject {
public:
~CachedShader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
return size_in_bytes;
}
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
static Shader CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
ProgramCode program_code, ProgramCode program_code_b);
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
static Shader CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return code.size() * sizeof(u64);
}
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;
}
/// Gets the GL program handle for the shader
GLuint GetHandle(const ProgramVariant& variant);
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes);
private:
struct LockerVariant {
std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
std::unordered_map<ProgramVariant, CachedProgram> programs;
};
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode program_code,
ProgramCode program_code_b);
bool EnsureValidLockerVariant();
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
const VideoCommon::Shader::ConstBufferLocker& locker) const;
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr{};
u64 unique_identifier{};
Tegra::Engines::ShaderType shader_type{};
GLShader::ShaderEntries entries;
ProgramCode code;
ProgramCode code_b;
LockerVariant* curr_locker_variant = nullptr;
std::vector<std::unique_ptr<LockerVariant>> locker_variants;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws);
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::unordered_set<GLenum>& supported_formats);
const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
PrecompiledPrograms precompiled_programs;
std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};

View File

@@ -23,8 +23,9 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader/transform_feedback.h"
namespace OpenGL::GLShader {
namespace OpenGL {
namespace {
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
bvec2 is_nan1 = isnan(pair1);
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}}
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)";
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
/// Describes primitive behavior on geometry shaders
std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return {"points", 1};
case Maxwell::PrimitiveTopology::Lines:
case Maxwell::PrimitiveTopology::LineStrip:
return {"lines", 2};
case Maxwell::PrimitiveTopology::LinesAdjacency:
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return {"lines_adjacency", 4};
case Maxwell::PrimitiveTopology::Triangles:
case Maxwell::PrimitiveTopology::TriangleStrip:
case Maxwell::PrimitiveTopology::TriangleFan:
return {"triangles", 3};
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return {"triangles_adjacency", 6};
default:
UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
return {"points", 1};
}
}
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
constexpr const char* GetColorSwizzle(std::size_t element) {
constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
return swizzle.at(element);
}
/// Translate topology
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -310,10 +366,19 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}
constexpr bool IsLegacyTexCoord(Attribute::Index index) {
return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
}
constexpr Attribute::Index ToGenericAttribute(u64 value) {
return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
}
constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
}
u32 GetGenericAttributeIndex(Attribute::Index index) {
ASSERT(IsGenericAttribute(index));
return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
@@ -337,15 +402,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) {
return stage == ShaderType::Vertex;
}
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
bool is_scalar = false;
};
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier, std::string_view suffix)
: device{device}, ir{ir}, registry{registry}, stage{stage},
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
}
void Decompile() {
DeclareHeader();
DeclareVertex();
DeclareGeometry();
DeclareFragment();
DeclareCompute();
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareImages();
DeclareSamplers();
DeclareGlobalMemory();
DeclareConstantBuffers();
DeclareLocalMemory();
DeclareRegisters();
DeclarePredicates();
DeclareInternalFlags();
DeclareCustomVariables();
DeclarePhysicalAttributeReader();
code.AddLine("void main() {{");
++code.scope;
if (stage == ShaderType::Vertex) {
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
}
if (ir.IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
}
--code.scope;
code.AddLine("}}");
}
std::string GetResult() {
return code.GetResult();
}
private:
friend class ASTDecompiler;
friend class ExprDecompiler;
void DecompileBranchMode() {
// VM's program counter
@@ -387,46 +503,40 @@ public:
void DecompileAST();
void Decompile() {
DeclareVertex();
DeclareGeometry();
DeclareRegisters();
DeclareCustomVariables();
DeclarePredicates();
DeclareLocalMemory();
DeclareInternalFlags();
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
++code.scope;
if (ir.IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
void DeclareHeader() {
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
}
if (device.HasVertexViewportLayer()) {
code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
}
if (device.HasImageLoadFormatted()) {
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
if (device.HasWarpIntrinsics()) {
code.AddLine("#extension GL_NV_gpu_shader5 : require");
code.AddLine("#extension GL_NV_shader_thread_group : require");
code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
}
// This pragma stops Nvidia's driver from over optimizing math (probably using fp16
// operations) on places where we don't want to.
// Thanks to Ryujinx for finding this workaround.
code.AddLine("#pragma optionNV(fastmath off)");
--code.scope;
code.AddLine("}}");
code.AddNewLine();
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
}
std::string GetResult() {
return code.GetResult();
}
private:
friend class ASTDecompiler;
friend class ExprDecompiler;
void DeclareVertex() {
if (!IsVertexShader(stage))
if (stage != ShaderType::Vertex) {
return;
}
DeclareVertexRedeclarations();
}
@@ -436,9 +546,15 @@ private:
return;
}
const auto& info = registry.GetGraphicsInfo();
const auto input_topology = info.primitive_topology;
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
max_input_vertices = max_vertices;
code.AddLine("layout ({}) in;", glsl_topology);
const auto topology = GetTopologyName(header.common3.output_topology);
const auto max_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
const auto max_output_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@@ -450,11 +566,50 @@ private:
DeclareVertexRedeclarations();
}
void DeclareFragment() {
if (stage != ShaderType::Fragment) {
return;
}
if (ir.UsesLegacyVaryings()) {
code.AddLine("in gl_PerFragment {{");
++code.scope;
code.AddLine("vec4 gl_TexCoord[8];");
code.AddLine("vec4 gl_Color;");
code.AddLine("vec4 gl_SecondaryColor;");
--code.scope;
code.AddLine("}};");
}
for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
}
}
void DeclareCompute() {
if (stage != ShaderType::Compute) {
return;
}
const auto& info = registry.GetComputeInfo();
if (const u32 size = info.shared_memory_size_in_words; size > 0) {
code.AddLine("shared uint smem[{}];", size);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
code.AddNewLine();
}
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
code.AddLine("vec4 gl_Position;");
auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
if (!pos_xfb.empty()) {
pos_xfb = fmt::format("layout ({}) ", pos_xfb);
}
const char* pos_type =
FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -463,14 +618,14 @@ private:
break;
}
}
if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
if (ir.UsesViewportIndex()) {
code.AddLine("int gl_ViewportIndex;");
}
} else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
} else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
!device.HasVertexViewportLayer()) {
LOG_ERROR(
Render_OpenGL,
@@ -481,12 +636,12 @@ private:
code.AddLine("float gl_PointSize;");
}
if (ir.UsesInstanceId()) {
code.AddLine("int gl_InstanceID;");
}
if (ir.UsesVertexId()) {
code.AddLine("int gl_VertexID;");
if (ir.UsesLegacyVaryings()) {
code.AddLine("vec4 gl_TexCoord[8];");
code.AddLine("vec4 gl_FrontColor;");
code.AddLine("vec4 gl_FrontSecondaryColor;");
code.AddLine("vec4 gl_BackColor;");
code.AddLine("vec4 gl_BackSecondaryColor;");
}
--code.scope;
@@ -525,18 +680,16 @@ private:
}
void DeclareLocalMemory() {
u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
code.AddLine("#endif");
return;
local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
} else {
local_memory_size = header.GetLocalMemorySize();
}
const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -589,7 +742,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -626,9 +779,59 @@ private:
}
}
std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
return it->second.components;
}
std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
const VaryingTFB& tfb = it->second;
return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
tfb.offset, tfb.stride);
}
void DeclareOutputAttribute(Attribute::Index index) {
const u32 location{GetGenericAttributeIndex(index)};
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
static constexpr std::string_view swizzle = "xyzw";
u8 element = 0;
while (element < 4) {
auto xfb = GetTransformFeedbackDecoration(index, element);
if (!xfb.empty()) {
xfb = fmt::format(", {}", xfb);
}
const std::size_t remainder = 4 - element;
const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
const char* const type = FLOAT_TYPES.at(num_components - 1);
const u32 location = GetGenericAttributeIndex(index);
GenericVaryingDescription description;
description.first_element = static_cast<u8>(element);
description.is_scalar = num_components == 1;
description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
if (element != 0 || num_components != 4) {
const std::string_view name_swizzle = swizzle.substr(element, num_components);
description.name = fmt::format("{}_{}", description.name, name_swizzle);
}
for (std::size_t i = 0; i < num_components; ++i) {
const u8 offset = static_cast<u8>(location * 4 + element + i);
varying_description.insert({offset, description});
}
code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
xfb, type, description.name);
element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
}
}
void DeclareConstantBuffers() {
@@ -925,7 +1128,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
max_input_vertices.value());
}
return std::string(name);
};
@@ -943,6 +1147,10 @@ private:
default:
UNREACHABLE();
}
case Attribute::Index::FrontColor:
return {"gl_Color"s + GetSwizzle(element), Type::Float};
case Attribute::Index::FrontSecondaryColor:
return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
case Attribute::Index::PointCoord:
switch (element) {
case 0:
@@ -959,7 +1167,7 @@ private:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
ASSERT(IsVertexShader(stage));
ASSERT(stage == ShaderType::Vertex);
switch (element) {
case 2:
// Config pack's first value is instance_id.
@@ -980,7 +1188,13 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
Type::Float};
}
if (IsLegacyTexCoord(attribute)) {
UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
GetSwizzle(element)),
Type::Float};
}
break;
@@ -1021,21 +1235,22 @@ private:
}
std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
const u32 element = abuf->GetElement();
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}};
return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
switch (element) {
case 0:
UNIMPLEMENTED();
return {};
case 1:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_Layer", Type::Int}};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_ViewportIndex", Type::Int}};
@@ -1043,14 +1258,26 @@ private:
return {{"gl_PointSize", Type::Float}};
}
return {};
case Attribute::Index::FrontColor:
return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::FrontSecondaryColor:
return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::BackColor:
return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::BackSecondaryColor:
return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::ClipDistances0123:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}};
return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
case Attribute::Index::ClipDistances4567:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
return {
{GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
}
if (IsLegacyTexCoord(attribute)) {
return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
GetSwizzle(element)),
Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1822,16 +2049,19 @@ private:
expr += GetSampler(meta->sampler);
expr += ", ";
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += VisitOperand(operation, i).AsInt();
const std::size_t next = i + 1;
if (next == count)
expr += ')';
else if (next < count)
if (i > 0) {
expr += ", ";
}
expr += VisitOperand(operation, i).AsInt();
}
if (meta->array) {
expr += ", ";
expr += Visit(meta->array).AsInt();
}
expr += ')';
if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
@@ -1945,7 +2175,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
code.AddLine("FragColor{}[{}] = {};", render_target, component,
code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
@@ -2261,27 +2491,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
return GetDeclarationWithSuffix(index, "gpr");
return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
return GetDeclarationWithSuffix(index, "custom_var");
return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
return AppendSuffix(static_cast<u32>(pred), "pred");
}
std::string GetInputAttribute(Attribute::Index attribute) const {
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
std::string GetGenericInputAttribute(Attribute::Index attribute) const {
return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
std::string GetOutputAttribute(Attribute::Index attribute) const {
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
std::unordered_map<u8, GenericVaryingDescription> varying_description;
std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
const auto& description = varying_description.at(offset);
if (description.is_scalar) {
return description.name;
}
return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf");
return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2531,15 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf_block");
return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
return "lmem_" + suffix;
if (suffix.empty()) {
return "lmem";
} else {
return "lmem_" + std::string{suffix};
}
}
std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,23 +2548,31 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
return fmt::format("{}_{}", InternalFlagNames[index], suffix);
if (suffix.empty()) {
return InternalFlagNames[index];
} else {
return fmt::format("{}_{}", InternalFlagNames[index], suffix);
}
}
std::string GetSampler(const Sampler& sampler) const {
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
std::string AppendSuffix(u32 index, std::string_view name) const {
if (suffix.empty()) {
return fmt::format("{}{}", name, index);
} else {
return fmt::format("{}{}_{}", name, index, suffix);
}
}
u32 GetNumPhysicalInputAttributes() const {
return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
}
u32 GetNumPhysicalAttributes() const {
@@ -2334,17 +2583,31 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
bool IsRenderTargetEnabled(u32 render_target) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
return true;
}
}
return false;
}
const Device& device;
const ShaderIR& ir;
const Registry& registry;
const ShaderType stage;
const std::string suffix;
const std::string_view identifier;
const std::string_view suffix;
const Header header;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
std::optional<u32> max_input_vertices;
};
std::string GetFlowVariable(u32 i) {
return fmt::format("flow_var_{}", i);
std::string GetFlowVariable(u32 index) {
return fmt::format("flow_var{}", index);
}
class ExprDecompiler {
@@ -2531,7 +2794,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2555,28 +2818,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
return entries;
}
std::string GetCommonDeclarations() {
return R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
bvec2 is_nan1 = isnan(pair1);
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
)";
}
std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier,
std::string_view suffix) {
GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}
} // namespace OpenGL::GLShader
} // namespace OpenGL

View File

@@ -6,22 +6,18 @@
#include <array>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace OpenGL {
class Device;
}
namespace OpenGL::GLShader {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -78,11 +74,11 @@ struct ShaderEntries {
std::size_t shader_length{};
};
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
std::string GetCommonDeclarations();
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier,
std::string_view suffix = {});
std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const std::string& suffix);
} // namespace OpenGL::GLShader
} // namespace OpenGL

View File

@@ -31,32 +31,24 @@ namespace {
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
Raw,
Usage,
};
struct ConstBufferKey {
u32 cbuf{};
u32 offset{};
u32 value{};
u32 cbuf = 0;
u32 offset = 0;
u32 value = 0;
};
struct BoundSamplerKey {
u32 offset{};
Tegra::Engines::SamplerDescriptor sampler{};
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerKey {
u32 cbuf{};
u32 offset{};
Tegra::Engines::SamplerDescriptor sampler{};
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
constexpr u32 NativeVersion = 12;
// Making sure sizes doesn't change by accident
static_assert(sizeof(ProgramVariant) == 20);
constexpr u32 NativeVersion = 20;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // Anonymous namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
ProgramCode code_b)
: unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
code_b)} {}
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
u32 code_size{};
u32 code_size_b{};
u32 code_size;
u32 code_size_b;
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
code.resize(code_size);
code_b.resize(code_size_b);
if (file.ReadArray(code.data(), code_size) != code_size)
if (file.ReadArray(code.data(), code_size) != code_size) {
return false;
}
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
u8 is_texture_handler_size_known;
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
if (is_texture_handler_size_known) {
texture_handler_size = texture_handler_size_value;
}
std::vector<ConstBufferKey> flat_keys(num_keys);
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
for (const auto& key : flat_keys) {
keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : flat_bound_samplers) {
bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : flat_bindless_samplers) {
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
return true;
}
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
if (file.WriteArray(code.data(), code.size()) != code.size())
if (file.WriteArray(code.data(), code.size()) != code.size()) {
return false;
}
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
return true;
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
std::vector<ConstBufferKey> flat_keys;
flat_keys.reserve(keys.size());
for (const auto& [address, value] : keys) {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
std::vector<BoundSamplerKey> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
}
std::vector<BindlessSamplerKey> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
BindlessSamplerKey{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
GetTitleID());
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL,
"Failed to get transferable cache version for title id={}, skipping",
GetTitleID());
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return {};
}
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
}
// Version is valid, load the shaders
constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
std::vector<ShaderDiskCacheRaw> raws;
std::vector<ShaderDiskCacheUsage> usages;
std::vector<ShaderDiskCacheEntry> entries;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
return {};
}
switch (kind) {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
transferable.insert({entry.GetUniqueIdentifier(), {}});
raws.push_back(std::move(entry));
break;
}
case TransferableEntryKind::Usage: {
ShaderDiskCacheUsage usage;
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
file.ReadArray(&usage.variant, 1) != 1 ||
file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
usages.push_back(std::move(usage));
break;
}
default:
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
static_cast<u32>(kind));
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return {};
}
}
is_usable = true;
return {{std::move(raws), std::move(usages)}};
return {std::move(entries)};
}
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
std::string path = GetPrecompiledPath();
FileUtil::IOFile file(path, "rb");
FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
const auto result = LoadPrecompiledFile(file);
if (!result) {
LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={}, removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
return {};
if (const auto result = LoadPrecompiledFile(file)) {
return *result;
}
return *result;
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
file.Close();
InvalidatePrecompiled();
return {};
}
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
ShaderDumpsMap dumps;
std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
ShaderDiskCacheUsage usage;
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
!LoadObjectFromPrecompiled(usage.variant) ||
!LoadObjectFromPrecompiled(usage.bound_buffer) ||
!LoadObjectFromPrecompiled(num_keys) ||
!LoadObjectFromPrecompiled(num_bound_samplers) ||
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
!LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
!LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
ShaderDiskCacheDump dump;
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
u32 binary_size;
auto& entry = entries.emplace_back();
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
return {};
}
u32 binary_length{};
if (!LoadObjectFromPrecompiled(binary_length)) {
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return {};
}
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
dumps.emplace(std::move(usage), dump);
}
return dumps;
return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
}
}
void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
const u64 id = entry.GetUniqueIdentifier();
if (transferable.find(id) != transferable.end()) {
const u64 id = entry.unique_identifier;
if (stored_transferable.find(id) != stored_transferable.end()) {
// The shader already exists
return;
}
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!file.IsOpen()) {
return;
}
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
transferable.insert({id, {}});
stored_transferable.insert(id);
}
void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
if (!is_usable) {
return;
}
const auto it = transferable.find(usage.unique_identifier);
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
if (usages.find(usage) != usages.end()) {
// Skip this variant since the shader is already stored.
return;
}
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
file.Close();
InvalidateTransferable();
};
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
file.WriteObject(usage.bound_buffer) != 1 ||
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
Close();
return;
}
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
}
void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
GLint binary_length{};
GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
usage.unique_identifier);
InvalidatePrecompiled();
};
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
!SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
Close();
return;
}
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
Close();
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
unique_identifier);
InvalidatePrecompiled();
}
}
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return;
}
}

View File

@@ -19,8 +19,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
namespace Core {
class System;
@@ -32,139 +31,39 @@ class IOFile;
namespace OpenGL {
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
using ProgramCode = std::vector<u64>;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Describes the different variants a program can be compiled with.
struct ProgramVariant final {
ProgramVariant() = default;
/// Graphics constructor.
explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
: primitive_mode{primitive_mode} {}
/// Compute constructor.
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
GLenum primitive_mode{};
// Compute specific parameters.
u32 block_x{};
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
rhs.block_z, rhs.shared_memory_size,
rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::is_trivially_copyable_v<ProgramVariant>);
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
ProgramVariant variant;
u32 bound_buffer{};
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
rhs.bindless_samplers);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
return !operator==(rhs);
}
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
(static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::ProgramVariant>{}(usage.variant);
}
};
} // namespace std
namespace OpenGL {
/// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
ProgramCode code, ProgramCode code_b = {});
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
/// Describes a shader and how it's used by the guest GPU
struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
u64 GetUniqueIdentifier() const {
return unique_identifier;
}
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
Tegra::Engines::ShaderType GetType() const {
return type;
}
const ProgramCode& GetCode() const {
return code;
}
const ProgramCode& GetCodeB() const {
return code_b;
}
private:
u64 unique_identifier{};
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
u64 unique_identifier = 0;
std::optional<u32> texture_handler_size;
u32 bound_buffer = 0;
VideoCommon::Shader::GraphicsInfo graphics_info;
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
GLenum binary_format{};
struct ShaderDiskCachePrecompiled {
u64 unique_identifier = 0;
GLenum binary_format = 0;
std::vector<u8> binary;
};
@@ -174,11 +73,10 @@ public:
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
LoadTransferable();
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
/// Saves shader usage to the transferable file. Does not check for collisions.
void SaveUsage(const ShaderDiskCacheUsage& usage);
void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiledFile(FileUtil::IOFile& file);
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
FileUtil::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
std::unordered_set<u64> stored_transferable;
// The cache has been loaded at boot
bool is_usable{};

View File

@@ -1,109 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <fmt/format.h>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::CompileDepth;
using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Vertex, "vertex");
if (ir_b) {
out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
}
out += R"(
void main() {
gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
execute_vertex();
)";
if (ir_b) {
out += " execute_vertex_b();";
}
out += "}\n";
return out;
}
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (std140, binding = {}) uniform gs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Geometry, "geometry");
out += R"(
void main() {
execute_geometry();
}
)";
return out;
}
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2;
layout (location = 3) out vec4 FragColor3;
layout (location = 4) out vec4 FragColor4;
layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (std140, binding = {}) uniform fs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Fragment, "fragment");
out += R"(
void main() {
execute_fragment();
}
)";
return out;
}
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += Decompile(device, ir, ShaderType::Compute, "compute");
out += R"(
void main() {
execute_compute();
}
)";
return out;
}
} // namespace OpenGL::GLShader

View File

@@ -1,34 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
}
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
/// Generates the GLSL vertex shader program source code for the given VS program
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
/// Generates the GLSL geometry shader program source code for the given GS program
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL fragment shader program source code for the given FS program
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL compute shader program source code for the given CS program
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
} // namespace OpenGL::GLShader

View File

@@ -94,6 +94,15 @@ void SetupDirtyShaders(Tables& tables) {
Shaders);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
tables[1][OFF(polygon_mode_front)] = PolygonModes;
tables[1][OFF(polygon_mode_back)] = PolygonModes;
tables[0][OFF(fill_rectangle)] = PolygonModes;
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
@@ -211,6 +220,7 @@ void StateTracker::Initialize() {
SetupDirtyVertexArrays(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);
@@ -228,7 +238,6 @@ void StateTracker::Initialize() {
SetupDirtyMisc(tables);
auto& store = dirty.on_write_stores;
SetupCommonOnWriteStores(store);
store[VertexBuffers] = true;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
store[VertexBuffer0 + i] = true;

View File

@@ -59,6 +59,10 @@ enum : u8 {
Shaders,
ClipDistances,
PolygonModes,
PolygonModeFront,
PolygonModeBack,
ColorMask,
FrontFace,
CullTest,
@@ -111,6 +115,13 @@ public:
flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;

View File

@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI

View File

@@ -488,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
return GL_COPY;
}
inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
switch (polygon_mode) {
case Maxwell::PolygonMode::Point:
return GL_POINT;
case Maxwell::PolygonMode::Line:
return GL_LINE;
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
return GL_FILL;
}
} // namespace MaxwellToGL
} // namespace OpenGL

View File

@@ -5,8 +5,11 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -25,6 +28,8 @@
namespace OpenGL {
namespace {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames.
constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -41,124 +46,6 @@ struct Frame {
bool is_srgb{}; /// Framebuffer is sRGB or RGB
};
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
* but also make sure that rendering happens at the pace that the frontend dictates. This is a
* helper class that the renderer uses to sync frames between the render thread and the presentation
* thread
*/
class FrameMailbox {
public:
std::mutex swap_chain_lock;
std::condition_variable present_cv;
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frame*> free_queue;
std::deque<Frame*> present_queue;
Frame* previous_frame{};
FrameMailbox() {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
~FrameMailbox() {
// lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock{swap_chain_lock};
std::queue<Frame*>().swap(free_queue);
present_queue.clear();
present_cv.notify_all();
}
void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frame* GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void ReleaseRenderFrame(Frame* frame) {
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
}
Frame* TryGetPresentFrame(int timeout_ms) {
std::unique_lock lock{swap_chain_lock};
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
}
// the newest entries are pushed to the front of the queue
Frame* frame = present_queue.front();
present_queue.pop_front();
// remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
return frame;
}
};
namespace {
constexpr char VERTEX_SHADER[] = R"(
#version 430 core
@@ -211,6 +98,24 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
/// Returns true if any debug tool is attached
bool HasDebugTool() {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
if (nsight) {
return true;
}
GLint num_extensions;
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
if (!std::strcmp(name, "GL_EXT_debug_tool")) {
return true;
}
}
return false;
}
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
* but also make sure that rendering happens at the pace that the frontend dictates. This is a
* helper class that the renderer uses to sync frames between the render thread and the presentation
* thread
*/
class FrameMailbox {
public:
std::mutex swap_chain_lock;
std::condition_variable present_cv;
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frame*> free_queue;
std::deque<Frame*> present_queue;
Frame* previous_frame{};
FrameMailbox() : has_debug_tool{HasDebugTool()} {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
~FrameMailbox() {
// lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock{swap_chain_lock};
std::queue<Frame*>().swap(free_queue);
present_queue.clear();
present_cv.notify_all();
}
void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frame* GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void ReleaseRenderFrame(Frame* frame) {
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
DebugNotifyNextFrame();
}
Frame* TryGetPresentFrame(int timeout_ms) {
DebugWaitForNextFrame();
std::unique_lock lock{swap_chain_lock};
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
}
// the newest entries are pushed to the front of the queue
Frame* frame = present_queue.front();
present_queue.pop_front();
// remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
return frame;
}
private:
std::mutex debug_synch_mutex;
std::condition_variable debug_synch_condition;
std::atomic_int frame_for_debug{};
const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
/// Signal that a new frame is available (called from GPU thread)
void DebugNotifyNextFrame() {
if (!has_debug_tool) {
return;
}
frame_for_debug++;
std::lock_guard lock{debug_synch_mutex};
debug_synch_condition.notify_one();
}
/// Wait for a new frame to be available (called from presentation thread)
void DebugWaitForNextFrame() {
if (!has_debug_tool) {
return;
}
const int last_frame = frame_for_debug;
std::unique_lock lock{debug_synch_mutex};
debug_synch_condition.wait(lock,
[this, last_frame] { return frame_for_debug > last_frame; });
}
};
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
frame_mailbox{std::make_unique<FrameMailbox>()} {}
@@ -576,6 +628,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask0();
@@ -611,6 +664,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);

View File

@@ -125,6 +125,7 @@ struct FormatTuple {
{vk::Format::eR8Uint, Attachable | Storage}, // R8UI
{vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
{vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
{vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
{vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
{vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
{vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -256,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
return vk::ShaderStageFlagBits::eGeometry;
case Tegra::Engines::ShaderType::Fragment:
return vk::ShaderStageFlagBits::eFragment;
case Tegra::Engines::ShaderType::Compute:
return vk::ShaderStageFlagBits::eCompute;
}
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
return {};
@@ -331,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR16G16B16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Unorm;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return vk::Format::eA2B10G10R10UnormPack32;
default:
break;
}
@@ -364,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
case Maxwell::VertexAttribute::Size::Size_32_32:
return vk::Format::eR32G32Uint;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return vk::Format::eR32G32B32Uint;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return vk::Format::eR32G32B32A32Uint;
default:
@@ -392,6 +401,26 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
}
break;
case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return vk::Format::eR8Sscaled;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Sscaled;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return vk::Format::eR8G8B8Sscaled;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return vk::Format::eR8G8B8A8Sscaled;
case Maxwell::VertexAttribute::Size::Size_16:
return vk::Format::eR16Sscaled;
case Maxwell::VertexAttribute::Size::Size_16_16:
return vk::Format::eR16G16Sscaled;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return vk::Format::eR16G16B16Sscaled;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Sscaled;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
switch (size) {

View File

@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageReadWithoutFormat =
is_shader_storage_img_read_without_format_supported;
features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
transform_feedback.transformFeedback = true;
transform_feedback.geometryStreams = true;
SetNext(next, transform_feedback);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
extensions.reserve(14);
extensions.reserve(15);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
bool khr_shader_float16_int8{};
bool ext_subgroup_size_control{};
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
}
if (khr_shader_float16_int8) {
if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (ext_subgroup_size_control) {
if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
is_warp_potentially_bigger = true;
}
if (has_ext_transform_feedback) {
const auto features =
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
const auto properties =
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
if (features.transformFeedback && features.geometryStreams &&
properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
ext_transform_feedback = true;
}
}
return extensions;
}
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
is_shader_storage_img_read_without_format_supported =
supported_features.shaderStorageImageReadWithoutFormat;
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16B16A16Snorm,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,

View File

@@ -122,11 +122,6 @@ public:
return properties.limits.maxPushConstantsSize;
}
/// Returns true if Shader storage Image Read Without Format supported.
bool IsShaderStorageImageReadWithoutFormatSupported() const {
return is_shader_storage_img_read_without_format_supported;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
return ext_shader_viewport_index_layer;
}
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
}
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.

View File

@@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, locker},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
@@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache)
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
renderpass_cache(device) {}
renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
@@ -191,7 +192,6 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
@@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
const SPIRVShader spirv_shader{
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
shader->GetEntries()};
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
shader->GetRegistry(), specialization),
shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
@@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
specialization.primitive_topology = fixed_state.input_assembly.topology;
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
@@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
entries};
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration

View File

@@ -25,7 +25,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
@@ -132,6 +132,10 @@ public:
return shader_ir;
}
const VideoCommon::Shader::Registry& GetRegistry() const {
return registry;
}
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
@@ -147,7 +151,7 @@ private:
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::ConstBufferLocker locker;
VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};
@@ -157,7 +161,8 @@ public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
~VKPipelineCache();
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache renderpass_cache;
VKRenderPassCache& renderpass_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;

View File

@@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
screen_info{screen_info}, device{device}, resource_manager{resource_manager},
memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
update_descriptor_queue(device, scheduler),
update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
@@ -347,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
}
BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}
draw_params.Draw(cmdbuf, dld);
});
EndTransformFeedback();
}
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
query_cache.UpdateCounters();
const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return;
}
sampled_views.clear();
image_views.clear();
query_cache.UpdateCounters();
const auto& regs = gpu.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
@@ -376,52 +384,54 @@ void RasterizerVulkan::Clear() {
if (!use_color && !use_depth && !use_stencil) {
return;
}
// Clearing images requires to be out of a renderpass
scheduler.RequestOutsideRenderPassOperationContext();
// TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
[[maybe_unused]] const auto texceptions = UpdateAttachments();
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
const auto& scissor = regs.scissor_test[0];
const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
scissor_extent.width = std::min(scissor_extent.width, render_area.width);
scissor_extent.height = std::min(scissor_extent.height, render_area.height);
const u32 layer = regs.clear_buffers.layer;
const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
if (use_color) {
View color_view;
{
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
}
color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
vk::PipelineStageFlagBits::eTransfer,
vk::AccessFlagBits::eTransferWrite);
const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
regs.clear_color[2], regs.clear_color[3]};
const vk::ClearColorValue clear(clear_color);
scheduler.Record([image = color_view->GetImage(),
subresource = color_view->GetImageSubresourceRange(),
clear](auto cmdbuf, auto& dld) {
cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
dld);
const vk::ClearValue clear_value{clear_color};
const u32 color_attachment = regs.clear_buffers.RT;
scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
clear_value);
cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
});
}
if (use_depth || use_stencil) {
View zeta_surface;
{
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
zeta_surface = texture_cache.GetDepthBufferSurface(false);
}
zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
vk::PipelineStageFlagBits::eTransfer,
vk::AccessFlagBits::eTransferWrite);
const vk::ClearDepthStencilValue clear(regs.clear_depth,
static_cast<u32>(regs.clear_stencil));
scheduler.Record([image = zeta_surface->GetImage(),
subresource = zeta_surface->GetImageSubresourceRange(),
clear](auto cmdbuf, auto& dld) {
cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
subresource, dld);
});
if (!use_depth && !use_stencil) {
return;
}
vk::ImageAspectFlags aspect_flags;
if (use_depth) {
aspect_flags |= vk::ImageAspectFlagBits::eDepth;
}
if (use_stencil) {
aspect_flags |= vk::ImageAspectFlagBits::eStencil;
}
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
const vk::ClearValue clear_value{clear_zeta};
const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
});
}
void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -538,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// Verify that the cached surface is the same size and format as the requested framebuffer
const auto& params{surface->GetSurfaceParams()};
const auto& pixel_format{
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
@@ -738,6 +746,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateStencilFaces(regs);
}
void RasterizerVulkan::BeginTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
const auto& binding = regs.tfb_bindings[0];
UNIMPLEMENTED_IF(binding.buffer_enable == 0);
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
});
}
void RasterizerVulkan::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
scheduler.Record(
[](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
}
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
@@ -1109,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
// This implementation assumes that all attributes are used in the shader.
const GPUVAddr start{regs.vertex_array[index].StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
DEBUG_ASSERT(end > start);
DEBUG_ASSERT(end >= start);
size += (end - start + 1) * regs.vertex_array[index].enable;
}

View File

@@ -169,6 +169,10 @@ private:
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -249,6 +253,7 @@ private:
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
VKRenderPassCache renderpass_cache;
QuadArrayPass quad_array_pass;
Uint8Pass uint8_pass;

View File

@@ -5,7 +5,9 @@
#include <functional>
#include <limits>
#include <map>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <fmt/format.h>
@@ -24,6 +26,7 @@
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader/transform_feedback.h"
namespace Vulkan {
@@ -93,6 +96,12 @@ struct VertexIndices {
std::optional<u32> clip_distances;
};
struct GenericVaryingDescription {
Id id = nullptr;
u32 first_element = 0;
bool is_scalar = false;
};
spv::Dim GetSamplerDim(const Sampler& sampler) {
ASSERT(!sampler.IsBuffer());
switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
const Specialization& specialization)
const Registry& registry, const Specialization& specialization)
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
specialization{specialization} {
registry{registry}, specialization{specialization} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
AddCapability(spv::Capability::Shader);
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
if (!transform_feedback.empty()) {
if (device.IsExtTransformFeedbackSupported()) {
AddCapability(spv::Capability::TransformFeedback);
} else {
LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
"supported on this device");
}
}
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
}
}
if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
@@ -318,25 +340,29 @@ public:
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common2.threads_per_input_primitive);
break;
case ShaderType::TesselationEval:
case ShaderType::TesselationEval: {
const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Tessellation);
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
AddExecutionMode(main, specialization.tessellation.clockwise
AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
AddExecutionMode(main, info.tessellation_clockwise
? spv::ExecutionMode::VertexOrderCw
: spv::ExecutionMode::VertexOrderCcw);
break;
case ShaderType::Geometry:
}
case ShaderType::Geometry: {
const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Geometry);
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common4.max_output_vertices);
// TODO(Rodrigo): Where can we get this info from?
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
break;
}
case ShaderType::Fragment:
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
if (stage != ShaderType::Geometry) {
return;
}
const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
const auto& info = registry.GetGraphicsInfo();
const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
DeclareInputVertexArray(num_input);
DeclareOutputVertex();
}
@@ -742,12 +769,34 @@ private:
}
void DeclareOutputAttributes() {
if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
return;
}
UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
for (const auto index : ir.GetOutputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
const u32 location = GetGenericAttributeLocation(index);
Id type = t_float4;
DeclareOutputAttribute(index);
}
}
void DeclareOutputAttribute(Attribute::Index index) {
static constexpr std::string_view swizzle = "xyzw";
const u32 location = GetGenericAttributeLocation(index);
u8 element = 0;
while (element < 4) {
const std::size_t remainder = 4 - element;
std::size_t num_components = remainder;
const std::optional tfb = GetTransformFeedbackInfo(index, element);
if (tfb) {
num_components = tfb->components;
}
Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
Id varying_default = v_varying_default;
if (IsOutputAttributeArray()) {
const u32 num = GetNumOutputVertices();
@@ -760,15 +809,47 @@ private:
}
type = TypePointer(spv::StorageClass::Output, type);
std::string name = fmt::format("out_attr{}", location);
if (num_components < 4 || element > 0) {
name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
}
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
output_attributes.emplace(index, id);
Name(AddGlobalVariable(id), name);
GenericVaryingDescription description;
description.id = id;
description.first_element = element;
description.is_scalar = num_components == 1;
for (u32 i = 0; i < num_components; ++i) {
const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
output_attributes.emplace(offset, description);
}
interfaces.push_back(id);
Decorate(id, spv::Decoration::Location, location);
if (element > 0) {
Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
}
if (tfb && device.IsExtTransformFeedbackSupported()) {
Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
}
element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
}
}
std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
return it->second;
}
u32 DeclareConstantBuffers(u32 binding) {
for (const auto& [index, size] : ir.GetConstantBuffers()) {
const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
@@ -898,7 +979,7 @@ private:
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
case ShaderType::TesselationControl:
case ShaderType::TesselationEval:
return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
}
default:
if (IsGenericAttribute(attribute)) {
const Id composite = output_attributes.at(attribute);
return {ArrayPass(t_out_float, composite, {element}), Type::Float};
const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
const GenericVaryingDescription description = output_attributes.at(offset);
const Id composite = description.id;
std::vector<u32> indices;
if (!description.is_scalar) {
indices.push_back(element - description.first_element);
}
return {ArrayPass(t_out_float, composite, indices), Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
}
Expression ImageLoad(Operation operation) {
if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
if (!device.IsFormatlessImageLoadSupported()) {
return {v_float_zero, Type::Float};
}
@@ -2258,11 +2345,11 @@ private:
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
switch (type) {
case Type::Float:
return {nullptr, t_float2, t_float3, t_float4};
return {t_float, t_float2, t_float3, t_float4};
case Type::Int:
return {nullptr, t_int2, t_int3, t_int4};
return {t_int, t_int2, t_int3, t_int4};
case Type::Uint:
return {nullptr, t_uint2, t_uint3, t_uint4};
return {t_uint, t_uint2, t_uint3, t_uint4};
default:
UNIMPLEMENTED();
return {};
@@ -2495,7 +2582,9 @@ private:
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
const Registry& registry;
const Specialization& specialization;
std::unordered_map<u8, VaryingTFB> transform_feedback;
const Id t_void = Name(TypeVoid(), "void");
@@ -2584,7 +2673,7 @@ private:
Id shared_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
std::map<Attribute::Index, Id> output_attributes;
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
ShaderType stage, const Specialization& specialization) {
return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
ShaderType stage, const VideoCommon::Shader::Registry& registry,
const Specialization& specialization) {
return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
}
} // namespace Vulkan

View File

@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
// Tessellation specific
struct {
Maxwell::TessellationPrimitive primitive{};
Maxwell::TessellationSpacing spacing{};
bool clockwise{};
} tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const Specialization& specialization);
Tegra::Engines::ShaderType stage,
const VideoCommon::Shader::Registry& registry,
const Specialization& specialization);
} // namespace Vulkan

View File

@@ -100,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) {
}
u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
static constexpr u64 epochs_to_destroy = 180;
static constexpr std::size_t deletions_per_tick = 16;
auto& staging = cache[log2];
@@ -108,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
const std::size_t old_size = entries.size();
const auto is_deleteable = [this](const auto& entry) {
static constexpr u64 epochs_to_destroy = 180;
return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
};
const std::size_t begin_offset = staging.delete_index;

View File

@@ -90,8 +90,6 @@ void StateTracker::Initialize() {
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupCommonOnWriteStores(dirty.on_write_stores);
}
void StateTracker::InvalidateCommandBufferState() {

View File

@@ -52,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
return vk::ImageType::e2D;
case SurfaceTarget::Texture3D:
return vk::ImageType::e3D;
case SurfaceTarget::TextureBuffer:
UNREACHABLE();
return {};
}
UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
return {};
@@ -273,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
for (u32 level = 0; level < params.num_levels; ++level) {
vk::BufferImageCopy copy = GetBufferImageCopy(level);
const auto& dld = device.GetDispatchLoader();
if (image->GetAspectMask() ==
(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
vk::BufferImageCopy depth = copy;
@@ -422,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
const auto& dld{device.GetDispatchLoader()};
const vk::ImageSubresourceLayers src_subresource(
src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
const vk::ImageSubresourceLayers dst_subresource(
@@ -458,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
const auto& dld{device.GetDispatchLoader()};
scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
is_linear](auto cmdbuf, auto& dld) {
cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,

View File

@@ -1,126 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <tuple>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/const_buffer_locker.h"
namespace VideoCommon::Shader {
using Tegra::Engines::SamplerDescriptor;
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
: stage{shader_stage} {}
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine)
: stage{shader_stage}, engine{&engine} {}
ConstBufferLocker::~ConstBufferLocker() = default;
std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
const std::pair<u32, u32> key = {buffer, offset};
const auto iter = keys.find(key);
if (iter != keys.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
keys.emplace(key, value);
return value;
}
std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
const u32 key = offset;
const auto iter = bound_samplers.find(key);
if (iter != bound_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
bound_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
bindless_samplers.emplace(key, value);
return value;
}
std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
if (bound_buffer_saved) {
return bound_buffer;
}
if (!engine) {
return std::nullopt;
}
bound_buffer_saved = true;
bound_buffer = engine->GetBoundBuffer();
return bound_buffer;
}
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
bound_samplers.insert_or_assign(offset, sampler);
}
void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
bound_buffer_saved = true;
bound_buffer = buffer;
}
bool ConstBufferLocker::IsConsistent() const {
if (!engine) {
return false;
}
return std::all_of(keys.begin(), keys.end(),
[this](const auto& pair) {
const auto [cbuf, offset] = pair.first;
const auto value = pair.second;
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
}) &&
std::all_of(bound_samplers.begin(), bound_samplers.end(),
[this](const auto& sampler) {
const auto [key, value] = sampler;
return value == engine->AccessBoundSampler(stage, key);
}) &&
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
[this](const auto& sampler) {
const auto [cbuf, offset] = sampler.first;
const auto value = sampler.second;
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
});
}
bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
return std::tie(keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
}
} // namespace VideoCommon::Shader

View File

@@ -1,103 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <unordered_map>
#include "common/common_types.h"
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
/**
* The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
* compiler. with it, the shader can obtain required data from GPU state and store it for disk
* shader compilation.
*/
class ConstBufferLocker {
public:
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine);
~ConstBufferLocker();
/// Retrieves a key from the locker, if it's registered, it will give the registered value, if
/// not it will obtain it from maxwell3d and register it.
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
std::optional<u32> ObtainBoundBuffer();
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
/// Inserts a bound sampler key.
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Set the bound buffer for this locker.
void SetBoundBuffer(u32 buffer);
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
/// the same value, false otherwise;
bool IsConsistent() const;
/// Returns true if the keys are equal to the other ones in the locker.
bool HasEqualKeys(const ConstBufferLocker& rhs) const;
/// Gives an getter to the const buffer keys in the database.
const KeyMap& GetKeys() const {
return keys;
}
/// Gets samplers database.
const BoundSamplerMap& GetBoundSamplers() const {
return bound_samplers;
}
/// Gets bindless samplers database.
const BindlessSamplerMap& GetBindlessSamplers() const {
return bindless_samplers;
}
/// Gets bound buffer used on this shader
u32 GetBoundBuffer() const {
return bound_buffer;
}
/// Obtains access to the guest driver's profile.
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
if (engine) {
return &engine->AccessGuestDriverProfile();
}
return nullptr;
}
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
bool bound_buffer_saved{};
u32 bound_buffer{};
};
} // namespace VideoCommon::Shader

View File

@@ -13,6 +13,7 @@
#include "common/common_types.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
: program_code{program_code}, locker{locker}, start{start} {}
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
: program_code{program_code}, registry{registry}, start{start} {}
const ProgramCode& program_code;
ConstBufferLocker& locker;
Registry& registry;
u32 start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
const s32 pc_target = offset + result.relative_position;
std::vector<CaseBranch> branches;
for (u32 i = 0; i < result.entries; i++) {
auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
if (!key) {
return {ParseResult::AbnormalFlow, parse_info};
}
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker) {
Registry& registry) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
CFGRebuildState state{program_code, start_address, locker};
CFGRebuildState state{program_code, start_address, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);

View File

@@ -12,6 +12,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker);
Registry& registry);
} // namespace VideoCommon::Shader

View File

@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
if (gpu_driver == nullptr) {
LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
return;
}
if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
VideoCore::GuestDriverProfile* gpu_driver,
VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
if (gpu_driver == nullptr) {
LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
return std::nullopt;
}
const u32 base_offset = sampler_to_deduce.GetOffset();
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
if (max_offset == std::numeric_limits<u32>::max()) {
return std::nullopt;
}
return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
}
} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
auto info = ScanFlow(program_code, main_offset, settings, locker);
auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
auto gpu_driver = locker.AccessGuestDriverProfile();
auto gpu_driver = registry.AccessGuestDriverProfile();
DeduceTextureHandlerSize(gpu_driver, used_samplers);
// Deduce Indexed Samplers
if (!uses_indexed_samplers) {

View File

@@ -12,6 +12,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
if (sampler_info) {
return *sampler_info;
}
const auto sampler =
buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
: registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
return SamplerInfo{TextureType::Texture2D, false, false, false};

View File

@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -30,7 +31,7 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const bool is_signed_b = instr.xmad.sign_b == 1;
const bool is_signed_c = is_signed_a;
auto [is_merge, is_psl, is_high_b, mode, op_b,
auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
@@ -63,15 +64,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
const Node original_b = op_b;
op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
const Node original_b = op_b_binding;
const Node op_b =
SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
// TODO(Rodrigo): Use an appropiate sign for this operation
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
// we already check sign_a and sign_b is difference or not before so just use one in here.
Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
if (is_psl) {
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
product =
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
}
SetTemporary(bb, 0, product);
product = GetTemporary(0);
@@ -88,12 +93,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return BitfieldExtract(original_c, 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
NO_PRECISE, original_b, Immediate(16));
return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
shifted_b);
original_b, Immediate(16));
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
}
case Tegra::Shader::XmadMode::CSfu: {
const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
op_a, Immediate(0));
const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
op_b, Immediate(0));
const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
const Node comp_minus_a = GetPredicateComparisonInteger(
PredCondition::NotEqual, is_signed_a,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
Immediate(0x80000000)),
Immediate(0));
const Node comp_minus_b = GetPredicateComparisonInteger(
PredCondition::NotEqual, is_signed_b,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
Immediate(0x80000000)),
Immediate(0));
Node new_c = Operation(
OperationCode::Select, comp_minus_a,
SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
original_c);
new_c = Operation(
OperationCode::Select, comp_minus_b,
SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
std::move(new_c));
return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
}
default:
UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
UNREACHABLE();
return Immediate(0);
}
}();
@@ -102,18 +135,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
op_c = GetTemporary(1);
// TODO(Rodrigo): Use an appropiate sign for this operation
Node sum = Operation(OperationCode::IAdd, product, op_c);
Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
SetTemporary(bb, 2, sum);
sum = GetTemporary(2);
if (is_merge) {
const Node a = BitfieldExtract(sum, 0, 16);
const Node b =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
Immediate(0), Immediate(16));
const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
Immediate(16));
sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
}
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
SetRegister(bb, instr.gpr0, sum);
SetRegister(bb, instr.gpr0, std::move(sum));
return pc;
}

View File

@@ -0,0 +1,161 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <tuple>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
using Tegra::Engines::ConstBufferEngineInterface;
using Tegra::Engines::SamplerDescriptor;
using Tegra::Engines::ShaderType;
namespace {
GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage == ShaderType::Compute) {
return {};
}
auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
GraphicsInfo info;
info.tfb_layouts = graphics.regs.tfb_layouts;
info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
info.primitive_topology = graphics.regs.draw.topology;
info.tessellation_primitive = graphics.regs.tess_mode.prim;
info.tessellation_spacing = graphics.regs.tess_mode.spacing;
info.tfb_enabled = graphics.regs.tfb_enabled;
info.tessellation_clockwise = graphics.regs.tess_mode.cw;
return info;
}
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage != ShaderType::Compute) {
return {};
}
auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
const auto& launch = compute.launch_description;
ComputeInfo info;
info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
info.local_memory_size_in_words = launch.local_pos_alloc;
info.shared_memory_size_in_words = launch.shared_alloc;
return info;
}
} // Anonymous namespace
Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
Registry::Registry(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine)
: stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
shader_stage, engine)} {}
Registry::~Registry() = default;
std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
const std::pair<u32, u32> key = {buffer, offset};
const auto iter = keys.find(key);
if (iter != keys.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
keys.emplace(key, value);
return value;
}
std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
const u32 key = offset;
const auto iter = bound_samplers.find(key);
if (iter != bound_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
bound_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
bindless_samplers.emplace(key, value);
return value;
}
void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
bound_samplers.insert_or_assign(offset, sampler);
}
void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
bool Registry::IsConsistent() const {
if (!engine) {
return true;
}
return std::all_of(keys.begin(), keys.end(),
[this](const auto& pair) {
const auto [cbuf, offset] = pair.first;
const auto value = pair.second;
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
}) &&
std::all_of(bound_samplers.begin(), bound_samplers.end(),
[this](const auto& sampler) {
const auto [key, value] = sampler;
return value == engine->AccessBoundSampler(stage, key);
}) &&
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
[this](const auto& sampler) {
const auto [cbuf, offset] = sampler.first;
const auto value = sampler.second;
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
});
}
bool Registry::HasEqualKeys(const Registry& rhs) const {
return std::tie(keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
}
const GraphicsInfo& Registry::GetGraphicsInfo() const {
ASSERT(stage != Tegra::Engines::ShaderType::Compute);
return graphics_info;
}
const ComputeInfo& Registry::GetComputeInfo() const {
ASSERT(stage == Tegra::Engines::ShaderType::Compute);
return compute_info;
}
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,137 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include "common/common_types.h"
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
struct GraphicsInfo {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
tfb_layouts{};
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
Maxwell::PrimitiveTopology primitive_topology{};
Maxwell::TessellationPrimitive tessellation_primitive{};
Maxwell::TessellationSpacing tessellation_spacing{};
bool tfb_enabled = false;
bool tessellation_clockwise = false;
};
static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
std::is_standard_layout_v<GraphicsInfo>);
struct ComputeInfo {
std::array<u32, 3> workgroup_size{};
u32 shared_memory_size_in_words = 0;
u32 local_memory_size_in_words = 0;
};
static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
struct SerializedRegistryInfo {
VideoCore::GuestDriverProfile guest_driver_profile;
u32 bound_buffer = 0;
GraphicsInfo graphics;
ComputeInfo compute;
};
/**
* The Registry is a class use to interface the 3D and compute engines with the shader compiler.
* With it, the shader can obtain required data from GPU state and store it for disk shader
* compilation.
*/
class Registry {
public:
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
explicit Registry(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine);
~Registry();
/// Retrieves a key from the registry, if it's registered, it will give the registered value, if
/// not it will obtain it from maxwell3d and register it.
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
/// Inserts a bound sampler key.
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Checks keys and samplers against engine's current const buffers.
/// Returns true if they are the same value, false otherwise.
bool IsConsistent() const;
/// Returns true if the keys are equal to the other ones in the registry.
bool HasEqualKeys(const Registry& rhs) const;
/// Returns graphics information from this shader
const GraphicsInfo& GetGraphicsInfo() const;
/// Returns compute information from this shader
const ComputeInfo& GetComputeInfo() const;
/// Gives an getter to the const buffer keys in the database.
const KeyMap& GetKeys() const {
return keys;
}
/// Gets samplers database.
const BoundSamplerMap& GetBoundSamplers() const {
return bound_samplers;
}
/// Gets bindless samplers database.
const BindlessSamplerMap& GetBindlessSamplers() const {
return bindless_samplers;
}
/// Gets bound buffer used on this shader
u32 GetBoundBuffer() const {
return bound_buffer;
}
/// Obtains access to the guest driver's profile.
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
}
private:
const Tegra::Engines::ShaderType stage;
VideoCore::GuestDriverProfile stored_guest_driver_profile;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
u32 bound_buffer;
GraphicsInfo graphics_info;
ComputeInfo compute_info;
};
} // namespace VideoCommon::Shader

View File

@@ -11,6 +11,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Registry& registry)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
Decode();
PostDecode();
}
@@ -95,6 +96,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
}
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
MarkAttributeUsage(index, element);
used_input_attributes.emplace(index);
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
@@ -105,42 +107,8 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
if (index == Attribute::Index::LayerViewportPointSize) {
switch (element) {
case 0:
UNIMPLEMENTED();
break;
case 1:
uses_layer = true;
break;
case 2:
uses_viewport_index = true;
break;
case 3:
uses_point_size = true;
break;
}
}
if (index == Attribute::Index::TessCoordInstanceIDVertexID) {
switch (element) {
case 2:
uses_instance_id = true;
break;
case 3:
uses_vertex_id = true;
break;
default:
break;
}
}
if (index == Attribute::Index::ClipDistances0123 ||
index == Attribute::Index::ClipDistances4567) {
const auto clip_index =
static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
used_clip_distances.at(clip_index) = true;
}
MarkAttributeUsage(index, element);
used_output_attributes.insert(index);
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
@@ -451,6 +419,54 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
Immediate(bits));
}
void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
switch (index) {
case Attribute::Index::LayerViewportPointSize:
switch (element) {
case 0:
UNIMPLEMENTED();
break;
case 1:
uses_layer = true;
break;
case 2:
uses_viewport_index = true;
break;
case 3:
uses_point_size = true;
break;
}
break;
case Attribute::Index::TessCoordInstanceIDVertexID:
switch (element) {
case 2:
uses_instance_id = true;
break;
case 3:
uses_vertex_id = true;
break;
}
break;
case Attribute::Index::ClipDistances0123:
case Attribute::Index::ClipDistances4567: {
const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
used_clip_distances.at(clip_index) = true;
break;
}
case Attribute::Index::FrontColor:
case Attribute::Index::FrontSecondaryColor:
case Attribute::Index::BackColor:
case Attribute::Index::BackSecondaryColor:
uses_legacy_varyings = true;
break;
default:
if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
uses_legacy_varyings = true;
}
break;
}
}
std::size_t ShaderIR::DeclareAmend(Node new_amend) {
const std::size_t id = amend_code.size();
amend_code.push_back(new_amend);

View File

@@ -18,8 +18,8 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker);
Registry& registry);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -137,6 +137,10 @@ public:
return uses_vertex_id;
}
bool UsesLegacyVaryings() const {
return uses_legacy_varyings;
}
bool UsesWarps() const {
return uses_warps;
}
@@ -343,6 +347,9 @@ private:
/// Inserts a sequence of bits from a node
Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
/// Marks the usage of a input or output attribute.
void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
@@ -414,7 +421,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
ConstBufferLocker& locker;
Registry& registry;
bool decompiled{};
bool disable_flow_stack{};
@@ -443,6 +450,7 @@ private:
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_legacy_varyings{};
bool uses_warps{};
bool uses_indexed_samplers{};

View File

@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track};
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
auto bound_buffer = locker.ObtainBoundBuffer();
if (!bound_buffer) {
const u32 bound_buffer = registry.GetBoundBuffer();
if (bound_buffer != cbuf->GetIndex()) {
return {};
}
if (*bound_buffer != cbuf->GetIndex()) {
return {};
}
auto pair = DecoupleIndirectRead(*operation);
const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {};
}
auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
auto gpu_driver = locker.AccessGuestDriverProfile();
if (gpu_driver == nullptr) {
return {};
}
const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
Immediate(gpu_driver->GetTextureHandlerSize()));
const Node op =
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));

View File

@@ -0,0 +1,115 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/transform_feedback.h"
namespace VideoCommon::Shader {
namespace {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
/// Attribute offsets that describe a vector
constexpr std::array VECTORS = {
28, // gl_Position
32, // Generic 0
36, // Generic 1
40, // Generic 2
44, // Generic 3
48, // Generic 4
52, // Generic 5
56, // Generic 6
60, // Generic 7
64, // Generic 8
68, // Generic 9
72, // Generic 10
76, // Generic 11
80, // Generic 12
84, // Generic 13
88, // Generic 14
92, // Generic 15
96, // Generic 16
100, // Generic 17
104, // Generic 18
108, // Generic 19
112, // Generic 20
116, // Generic 21
120, // Generic 22
124, // Generic 23
128, // Generic 24
132, // Generic 25
136, // Generic 26
140, // Generic 27
144, // Generic 28
148, // Generic 29
152, // Generic 30
156, // Generic 31
160, // gl_FrontColor
164, // gl_FrontSecondaryColor
160, // gl_BackColor
164, // gl_BackSecondaryColor
192, // gl_TexCoord[0]
196, // gl_TexCoord[1]
200, // gl_TexCoord[2]
204, // gl_TexCoord[3]
208, // gl_TexCoord[4]
212, // gl_TexCoord[5]
216, // gl_TexCoord[6]
220, // gl_TexCoord[7]
};
} // namespace
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
std::unordered_map<u8, VaryingTFB> tfb;
for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
const auto& locations = info.tfb_varying_locs[buffer];
const auto& layout = info.tfb_layouts[buffer];
const std::size_t varying_count = layout.varying_count;
std::size_t highest = 0;
for (std::size_t offset = 0; offset < varying_count; ++offset) {
const std::size_t base_offset = offset;
const u8 location = locations[offset];
VaryingTFB varying;
varying.buffer = layout.stream;
varying.stride = layout.stride;
varying.offset = offset * sizeof(u32);
varying.components = 1;
if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
const u8 base_index = location / 4;
while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
++offset;
++varying.components;
}
}
[[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
}
UNIMPLEMENTED_IF(highest != layout.stride);
}
return tfb;
}
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,23 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
struct VaryingTFB {
std::size_t buffer;
std::size_t stride;
std::size_t offset;
std::size_t components;
};
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
} // namespace VideoCommon::Shader

View File

@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return PixelFormat::RGBA16U;
case Tegra::RenderTargetFormat::RGBA16_SNORM:
return PixelFormat::RGBA16S;
case Tegra::RenderTargetFormat::RGBA16_UINT:
return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:

View File

@@ -25,82 +25,83 @@ enum class PixelFormat {
R8UI = 7,
RGBA16F = 8,
RGBA16U = 9,
RGBA16UI = 10,
R11FG11FB10F = 11,
RGBA32UI = 12,
DXT1 = 13,
DXT23 = 14,
DXT45 = 15,
DXN1 = 16, // This is also known as BC4
DXN2UNORM = 17,
DXN2SNORM = 18,
BC7U = 19,
BC6H_UF16 = 20,
BC6H_SF16 = 21,
ASTC_2D_4X4 = 22,
BGRA8 = 23,
RGBA32F = 24,
RG32F = 25,
R32F = 26,
R16F = 27,
R16U = 28,
R16S = 29,
R16UI = 30,
R16I = 31,
RG16 = 32,
RG16F = 33,
RG16UI = 34,
RG16I = 35,
RG16S = 36,
RGB32F = 37,
RGBA8_SRGB = 38,
RG8U = 39,
RG8S = 40,
RG32UI = 41,
RGBX16F = 42,
R32UI = 43,
R32I = 44,
ASTC_2D_8X8 = 45,
ASTC_2D_8X5 = 46,
ASTC_2D_5X4 = 47,
BGRA8_SRGB = 48,
DXT1_SRGB = 49,
DXT23_SRGB = 50,
DXT45_SRGB = 51,
BC7U_SRGB = 52,
R4G4B4A4U = 53,
ASTC_2D_4X4_SRGB = 54,
ASTC_2D_8X8_SRGB = 55,
ASTC_2D_8X5_SRGB = 56,
ASTC_2D_5X4_SRGB = 57,
ASTC_2D_5X5 = 58,
ASTC_2D_5X5_SRGB = 59,
ASTC_2D_10X8 = 60,
ASTC_2D_10X8_SRGB = 61,
ASTC_2D_6X6 = 62,
ASTC_2D_6X6_SRGB = 63,
ASTC_2D_10X10 = 64,
ASTC_2D_10X10_SRGB = 65,
ASTC_2D_12X12 = 66,
ASTC_2D_12X12_SRGB = 67,
ASTC_2D_8X6 = 68,
ASTC_2D_8X6_SRGB = 69,
ASTC_2D_6X5 = 70,
ASTC_2D_6X5_SRGB = 71,
E5B9G9R9F = 72,
RGBA16S = 10,
RGBA16UI = 11,
R11FG11FB10F = 12,
RGBA32UI = 13,
DXT1 = 14,
DXT23 = 15,
DXT45 = 16,
DXN1 = 17, // This is also known as BC4
DXN2UNORM = 18,
DXN2SNORM = 19,
BC7U = 20,
BC6H_UF16 = 21,
BC6H_SF16 = 22,
ASTC_2D_4X4 = 23,
BGRA8 = 24,
RGBA32F = 25,
RG32F = 26,
R32F = 27,
R16F = 28,
R16U = 29,
R16S = 30,
R16UI = 31,
R16I = 32,
RG16 = 33,
RG16F = 34,
RG16UI = 35,
RG16I = 36,
RG16S = 37,
RGB32F = 38,
RGBA8_SRGB = 39,
RG8U = 40,
RG8S = 41,
RG32UI = 42,
RGBX16F = 43,
R32UI = 44,
R32I = 45,
ASTC_2D_8X8 = 46,
ASTC_2D_8X5 = 47,
ASTC_2D_5X4 = 48,
BGRA8_SRGB = 49,
DXT1_SRGB = 50,
DXT23_SRGB = 51,
DXT45_SRGB = 52,
BC7U_SRGB = 53,
R4G4B4A4U = 54,
ASTC_2D_4X4_SRGB = 55,
ASTC_2D_8X8_SRGB = 56,
ASTC_2D_8X5_SRGB = 57,
ASTC_2D_5X4_SRGB = 58,
ASTC_2D_5X5 = 59,
ASTC_2D_5X5_SRGB = 60,
ASTC_2D_10X8 = 61,
ASTC_2D_10X8_SRGB = 62,
ASTC_2D_6X6 = 63,
ASTC_2D_6X6_SRGB = 64,
ASTC_2D_10X10 = 65,
ASTC_2D_10X10_SRGB = 66,
ASTC_2D_12X12 = 67,
ASTC_2D_12X12_SRGB = 68,
ASTC_2D_8X6 = 69,
ASTC_2D_8X6_SRGB = 70,
ASTC_2D_6X5 = 71,
ASTC_2D_6X5_SRGB = 72,
E5B9G9R9F = 73,
MaxColorFormat,
// Depth formats
Z32F = 73,
Z16 = 74,
Z32F = 74,
Z16 = 75,
MaxDepthFormat,
// DepthStencil formats
Z24S8 = 75,
S8Z24 = 76,
Z32FS8 = 77,
Z24S8 = 76,
S8Z24 = 77,
Z32FS8 = 78,
MaxDepthStencilFormat,
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // R8UI
0, // RGBA16F
0, // RGBA16U
0, // RGBA16S
0, // RGBA16UI
0, // R11FG11FB10F
0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
8, // R8UI
64, // RGBA16F
64, // RGBA16U
64, // RGBA16S
64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
SurfaceCompression::None, // RGBA16S
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI

View File

@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
constexpr std::array<Table, 75> DefinitionTable = {{
constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
{TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
{TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
{TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
{TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
{TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},

View File

@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
params.height = tic.Height();
params.depth = tic.Depth();
params.pitch = params.is_tiled ? 0 : tic.Pitch();
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
params.depth = 1;
} else if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = tic.max_mip_level + 1;

View File

@@ -104,6 +104,11 @@ public:
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
if (!IsTypeCompatible(tic.texture_type, entry)) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
@@ -914,13 +919,15 @@ private:
params.width = 1;
params.height = 1;
params.depth = 1;
if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
params.depth = 6;
}
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
invalid_memory.clear();
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
surface->UploadTexture(invalid_memory);
surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
return siblings_table[static_cast<std::size_t>(format)];
}
/// Returns true the shader sampler entry is compatible with the TIC texture type.
static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
const VideoCommon::Shader::Sampler& entry) {
const auto shader_type = entry.GetType();
switch (tic_type) {
case Tegra::Texture::TextureType::Texture1D:
case Tegra::Texture::TextureType::Texture1DArray:
return shader_type == Tegra::Shader::TextureType::Texture1D;
case Tegra::Texture::TextureType::Texture1DBuffer:
// TODO(Rodrigo): Assume as valid for now
return true;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return shader_type == Tegra::Shader::TextureType::Texture2D;
case Tegra::Texture::TextureType::Texture2DArray:
return shader_type == Tegra::Shader::TextureType::Texture2D ||
shader_type == Tegra::Shader::TextureType::TextureCube;
case Tegra::Texture::TextureType::Texture3D:
return shader_type == Tegra::Shader::TextureType::Texture3D;
case Tegra::Texture::TextureType::TextureCubeArray:
case Tegra::Texture::TextureType::TextureCubemap:
if (shader_type == Tegra::Shader::TextureType::TextureCube) {
return true;
}
return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
}
UNREACHABLE();
return true;
}
struct FramebufferTargetInfo {
TSurface target;
TView view;

File diff suppressed because it is too large Load Diff

View File

@@ -539,7 +539,7 @@ void Config::ReadDebuggingValues() {
void Config::ReadServiceValues() {
qt_config->beginGroup(QStringLiteral("Services"));
Settings::values.bcat_backend =
ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("boxcat"))
ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("null"))
.toString()
.toStdString();
Settings::values.bcat_boxcat_local =
@@ -682,6 +682,8 @@ void Config::ReadSystemValues() {
Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt();
Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
if (rng_seed_enabled) {
Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -698,6 +700,8 @@ void Config::ReadSystemValues() {
Settings::values.custom_rtc = std::nullopt;
}
Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt();
qt_config->endGroup();
}
@@ -1114,6 +1118,7 @@ void Config::SaveSystemValues() {
WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false);
WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
@@ -1125,6 +1130,8 @@ void Config::SaveSystemValues() {
Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
0);
WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1);
qt_config->endGroup();
}

View File

@@ -56,6 +56,8 @@ void ConfigureSystem::SetConfiguration() {
enabled = !Core::System::GetInstance().IsPoweredOn();
ui->combo_language->setCurrentIndex(Settings::values.language_index);
ui->combo_region->setCurrentIndex(Settings::values.region_index);
ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value());
@@ -81,6 +83,8 @@ void ConfigureSystem::ApplyConfiguration() {
}
Settings::values.language_index = ui->combo_language->currentIndex();
Settings::values.region_index = ui->combo_region->currentIndex();
Settings::values.sound_index = ui->combo_sound->currentIndex();
if (ui->rng_seed_checkbox->isChecked()) {
Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16);

View File

@@ -36,5 +36,6 @@ private:
bool enabled = false;
int language_index = 0;
int region_index = 0;
int sound_index = 0;
};

View File

@@ -22,14 +22,14 @@
<string>System Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<item row="2" column="0">
<widget class="QLabel" name="label_sound">
<property name="text">
<string>Sound output mode</string>
</property>
</widget>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QLabel" name="label_console_id">
<property name="text">
<string>Console ID:</string>
@@ -128,14 +128,60 @@
</item>
</widget>
</item>
<item row="4" column="0">
<item row="1" column="0">
<widget class="QLabel" name="label_region">
<property name="text">
<string>Region:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="combo_region">
<item>
<property name="text">
<string>Japan</string>
</property>
</item>
<item>
<property name="text">
<string>USA</string>
</property>
</item>
<item>
<property name="text">
<string>Europe</string>
</property>
</item>
<item>
<property name="text">
<string>Australia</string>
</property>
</item>
<item>
<property name="text">
<string>China</string>
</property>
</item>
<item>
<property name="text">
<string>Korea</string>
</property>
</item>
<item>
<property name="text">
<string>Taiwan</string>
</property>
</item>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="rng_seed_checkbox">
<property name="text">
<string>RNG Seed</string>
</property>
</widget>
</item>
<item row="1" column="1">
<item row="2" column="1">
<widget class="QComboBox" name="combo_sound">
<item>
<property name="text">
@@ -161,7 +207,7 @@
</property>
</widget>
</item>
<item row="2" column="1">
<item row="3" column="1">
<widget class="QPushButton" name="button_regenerate_console_id">
<property name="sizePolicy">
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -177,14 +223,14 @@
</property>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<widget class="QCheckBox" name="custom_rtc_checkbox">
<property name="text">
<string>Custom RTC</string>
</property>
</widget>
</item>
<item row="3" column="1">
<item row="4" column="1">
<widget class="QDateTimeEdit" name="custom_rtc_edit">
<property name="minimumDate">
<date>
@@ -198,7 +244,7 @@
</property>
</widget>
</item>
<item row="4" column="1">
<item row="5" column="1">
<widget class="QLineEdit" name="rng_seed_edit">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">

View File

@@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"(
QProgressBar {}
QProgressBar::chunk {})";
constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"(
QProgressBar {
background-color: black;
border: 2px solid white;
border-radius: 4px;
padding: 2px;
}
QProgressBar::chunk {
background-color: #0ab9e6;
width: 1px;
})";
constexpr char PROGRESSBAR_STYLE_BUILD[] = R"(
QProgressBar {
background-color: black;
@@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent)
stage_translations = {
{VideoCore::LoadCallbackStage::Prepare, tr("Loading...")},
{VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")},
{VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")},
{VideoCore::LoadCallbackStage::Complete, tr("Launching...")},
};
progressbar_style = {
{VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE},
{VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE},
{VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD},
{VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE},
};
@@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
}
// update labels and progress bar
if (stage == VideoCore::LoadCallbackStage::Decompile ||
stage == VideoCore::LoadCallbackStage::Build) {
if (stage == VideoCore::LoadCallbackStage::Build) {
ui->stage->setText(stage_translations[stage].arg(value).arg(total));
} else {
ui->stage->setText(stage_translations[stage]);

View File

@@ -1716,11 +1716,6 @@ void GMainWindow::OnStartGame() {
}
void GMainWindow::OnPauseGame() {
Core::System& system{Core::System::GetInstance()};
if (system.GetExitLock() && !ConfirmForceLockedExit()) {
return;
}
emu_thread->SetRunning(false);
ui.action_Start->setEnabled(true);

View File

@@ -452,7 +452,7 @@ void Config::ReadValues() {
Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", "");
// Services
Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "boxcat");
Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "null");
Settings::values.bcat_boxcat_local =
sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false);
}