Compare commits

...

202 Commits

Author SHA1 Message Date
ReinUsesLisp
a665581684 vk_blit_screen: Address feedback 2020-01-20 18:43:11 -03:00
ReinUsesLisp
f5dfe68a94 vk_blit_screen: Initial implementation
This abstraction takes care of presenting accelerated and
non-accelerated or "framebuffer" images to the Vulkan swapchain.
2020-01-19 21:12:43 -03:00
bunnei
41373d212e Merge pull request #3313 from ReinUsesLisp/vk-rasterizer
vk_rasterizer: Implement Vulkan's rasterizer
2020-01-19 18:09:01 -05:00
Bartosz Kaszubowski
c610a8ac5a GUI/gamelist: add "None" as an option for second row and remove dynamically duplicate row options (#3309)
* GUI/gamelist: add "None" as an option for second row and remove duplicated row options

* fix clang-format warnings
2020-01-19 15:58:14 -05:00
Bartosz Kaszubowski
265fe40451 GUI/gamelist: add "None" as an option for second row and remove dynamically duplicate row options (#3309)
* GUI/gamelist: add "None" as an option for second row and remove duplicated row options

* fix clang-format warnings
2020-01-19 15:57:14 -05:00
Bartosz Kaszubowski
9ac33c2620 GUI/gamelist: add "None" as an option for second row and remove dynamically duplicate row options (#3309)
* GUI/gamelist: add "None" as an option for second row and remove duplicated row options

* fix clang-format warnings
2020-01-19 15:56:49 -05:00
Fernando Sahmkow
51c8aea979 Merge pull request #3317 from ReinUsesLisp/gl-decomp-cc-decomp
gl_shader_decompiler: Fix decompilation of condition codes
2020-01-18 19:56:55 -04:00
bunnei
94c41ab1d1 Merge pull request #3323 from ReinUsesLisp/fix-template-res
gl_state: Use bool instead of GLboolean
2020-01-18 17:37:05 -05:00
ReinUsesLisp
d110a371bb gl_state: Use bool instead of GLboolean
This fixes template resolution considering GLboolean an integer instead
of a bool.
2020-01-18 19:10:34 -03:00
bunnei
e972016456 Merge pull request #3298 from Simek/missing_hotkeys
GUI: add few missing hotkeys to main menu
2020-01-18 13:07:13 -05:00
bunnei
278264b9e5 Merge pull request #3314 from degasus/physical_mem
core/hle/kernel: Simplify PhysicalMemory usages.
2020-01-18 03:03:48 -05:00
Markus Wick
56672b8c98 core/memory: Create a special MapMemoryRegion for physical memory.
This allows us to create a fastmem arena within the memory.cpp helpers.
2020-01-18 08:38:47 +01:00
Markus Wick
55103da066 core/hle: Simplify PhysicalMemory usage in vm_manager. 2020-01-18 08:29:19 +01:00
Markus Wick
7e94e544f4 core/loaders: Simplify PhysicalMemory usage.
It is currently a std::vector, however we might want to replace it with a more fancy allocator.
So we can't use the C++ iterators any more.
2020-01-18 08:29:19 +01:00
bunnei
9bf4850f74 Merge pull request #3305 from ReinUsesLisp/point-size-program
gl_state: Implement PROGRAM_POINT_SIZE
2020-01-18 01:56:32 -05:00
bunnei
15163edaaa Merge pull request #3312 from ReinUsesLisp/atoms-u32
shader/memory: Implement ATOMS.ADD.U32
2020-01-18 00:54:07 -05:00
bunnei
3cce5056ff Merge pull request #3318 from jroweboy/remove-cpu-vendor
Remove unused CPU Vendor string and telemtry field
2020-01-17 22:24:52 -05:00
James Rowe
4512a6bbfc Remove unused CPU Vendor string and telemtry field
The information is duplicated in the brand string and the telemetry field is unused
2020-01-17 18:41:18 -07:00
ReinUsesLisp
09b1d762d7 vk_rasterizer: Address feedback 2020-01-17 21:40:01 -03:00
ReinUsesLisp
f34e519da3 gl_shader_decompiler: Fix decompilation of condition codes
Use Visit instead of reimplementing it. Fixes unimplemented negations
for condition codes.
2020-01-17 21:23:01 -03:00
bunnei
530a761e7a Merge pull request #3316 from TotalCaesar659/linux-headbar-icon
Add headbar icon on Linux
2020-01-17 19:04:10 -05:00
TotalCaesar659
dd74fd014b Add headbar icon on Linux 2020-01-18 02:46:07 +03:00
bunnei
48863afb65 Merge pull request #3306 from ReinUsesLisp/gl-texture
gl_texture_cache: Minor fixes and style changes
2020-01-17 15:44:02 -05:00
bunnei
657b3a366e Merge pull request #3311 from ReinUsesLisp/z32fx24s8
format_lookup_table: Fix ZF32_X24S8 component types
2020-01-17 08:22:32 -05:00
ReinUsesLisp
fe5356d223 vk_rasterizer: Implement Vulkan's rasterizer
This abstraction is Vulkan's equivalent to OpenGL's rasterizer. It takes
care of joining all parts of the backend and rendering accordingly on
demand.
2020-01-16 23:05:15 -03:00
ReinUsesLisp
38e789c761 renderer_vulkan: Add header as placeholder 2020-01-16 22:54:15 -03:00
bunnei
e041f33569 Merge pull request #3300 from ReinUsesLisp/vk-texture-cache
vk_texture_cache: Implement generic texture cache on Vulkan
2020-01-16 19:19:26 -05:00
ReinUsesLisp
f09cd52980 vk_texture_cache: Address feedback 2020-01-16 18:23:10 -03:00
ReinUsesLisp
63ba41a26d shader/memory: Implement ATOMS.ADD.U32 2020-01-16 17:30:55 -03:00
ReinUsesLisp
0caab54b5d format_lookup_table: Fix ZF32_X24S8 component types
Component types for ZF32_X24S8 were using UNORM. Drivers will set FLOAT,
UINT, UNORM, UNORM; causing a format mismatch. This commit addresses
that.
2020-01-16 17:29:13 -03:00
Rodrigo Locatti
82e1285c1e vk_texture_cache: Fix typo in commentary
Co-Authored-By: MysticExile <30736337+MysticExile@users.noreply.github.com>
2020-01-16 16:59:46 -03:00
bunnei
30faf6a964 Merge pull request #3308 from lioncash/private
maxwell_3d: Make dirty_pointers private
2020-01-16 13:26:35 -05:00
bunnei
d23869811d Merge pull request #3304 from lioncash/fwd-decl
renderer_opengl/utils: Forward declare private structs
2020-01-16 11:21:18 -05:00
bunnei
a43ac8c79e Merge pull request #3307 from jroweboy/fix-git
Fix git version in scm_rev.cpp
2020-01-16 10:00:43 -05:00
Lioncash
9e874898f5 maxwell_3d: Make dirty_pointers private
This isn't used outside of the class itself, so we can make it private
for the time being.
2020-01-16 04:07:15 -05:00
James Rowe
b429095b61 Fix git version in scm_rev.cpp 2020-01-16 00:12:50 -07:00
ReinUsesLisp
c375d735e6 gl_state: Implement PROGRAM_POINT_SIZE
For gl_PointSize to have effect we have to activate
GL_PROGRAM_POINT_SIZE.
2020-01-15 16:14:17 -03:00
Lioncash
7af56dfa76 renderer_opengl/utils: Remove unused header inclusions
Nothing from these headers are used, so they can be removed.
2020-01-15 06:31:23 -05:00
Lioncash
06d30fbcca renderer_opengl/utils: Forward declare private structs
Keeps the definitions hidden and allows changes to the structs without
needing to recompile all users of classes containing said structs.
2020-01-15 06:30:01 -05:00
ReinUsesLisp
66a1c777c9 gl_texture_cache: Use local variables to simplify DownloadTexture 2020-01-14 17:39:48 -03:00
ReinUsesLisp
cdb00546f0 gl_texture_cache: Fix format for RGBX16F 2020-01-14 17:38:33 -03:00
ReinUsesLisp
2d09467f6f gl_texture_cache: Use Snorm internal format for RG8S 2020-01-14 17:37:58 -03:00
ReinUsesLisp
02624c35ec gl_texture_cache: Use Snorm internal format for ABGR8S 2020-01-14 17:37:23 -03:00
Rodrigo Locatti
64cd46579b Merge pull request #3303 from lioncash/reorder
control_flow: Silence -Wreorder warning for CFGRebuildState
2020-01-14 16:15:18 -03:00
Rodrigo Locatti
81e9e229fa Merge pull request #3302 from lioncash/unused-var
gl_shader_cache: Remove unused variables
2020-01-14 16:14:47 -03:00
Lioncash
a1eee1749e control_flow: Silence -Wreorder warning for CFGRebuildState
Organizes the initializer list in the same order that the variables
would actually be initialized in.
2020-01-14 13:28:48 -05:00
bunnei
a83e28b237 Merge pull request #3296 from Simek/hotkeys_resize
GUI/configure: resize hotkeys action column to fit content
2020-01-14 13:17:16 -05:00
Lioncash
f10ea944e0 gl_shader_cache: Remove unused STAGE_RESERVED_UBOS constant
Given this isn't used, this can be removed entirely.
2020-01-14 13:16:52 -05:00
Lioncash
4cd5ad90f3 gl_shader_cache: std::move entries in CachedShader constructor
Avoids several reallocations of std::vector instances where applicable.
2020-01-14 13:14:16 -05:00
Lioncash
15a6840e7a gl_shader_cache: Remove unused entries variable in BuildShader()
Eliminates a few unnecessary constructions of std::vectors.
2020-01-14 13:11:49 -05:00
bunnei
55f95e7f26 Merge pull request #3287 from ReinUsesLisp/ldg-stg-16
shader_ir/memory: Implement u16 and u8 for STG and LDG
2020-01-14 09:57:08 -05:00
bunnei
15788ffcde Merge pull request #3288 from ReinUsesLisp/uncurse-aoffi
shader_ir/texture: Simplify AOFFI code
2020-01-13 23:52:12 -05:00
bunnei
6985eea519 Merge pull request #3290 from ReinUsesLisp/gl-clamp
maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver
2020-01-13 19:16:06 -05:00
bunnei
e749f17257 Merge pull request #3292 from degasus/heap_space_fix
core/kernel: Fix GetTotalPhysicalMemoryUsed.
2020-01-13 19:15:43 -05:00
ReinUsesLisp
09e17fbb0f vk_texture_cache: Implement generic texture cache on Vulkan
It currently ignores PBO linearizations since these should be dropped as
soon as possible on OpenGL.
2020-01-13 20:37:50 -03:00
ReinUsesLisp
2b2712fa95 texture_cache/surface_params: Make GetNumLayers public 2020-01-13 20:35:43 -03:00
Bartosz Kaszubowski
da3049aa74 GUI: add few missing hotkeys to main menu 2020-01-13 00:49:44 +01:00
Bartosz Kaszubowski
6726e8b784 GUI/configure: resize hotkeys column to content 2020-01-12 22:46:28 +01:00
Fernando Sahmkow
43fc793439 Merge pull request #3283 from ReinUsesLisp/vk-compute-pass
vk_compute_pass: Add compute passes to emulate missing Vulkan features
2020-01-12 11:14:59 -04:00
Markus Wick
c76ffa5019 core/kernel: Fix GetTotalPhysicalMemoryUsed.
module._memory was already moved over to a new shared_ptr.
So code_memory_size was not increased at all.

This lowers the heap space and so saves a bit of memory, usually between 50 to 100 MB.

This fixes a regression of c0a01f3adc
2020-01-11 14:04:44 +01:00
Rodrigo Locatti
b1138e5ea1 vk_compute_pass: Address feedback
Comment hardcoded SPIR-V modules.
2020-01-10 22:46:34 -03:00
ReinUsesLisp
3d46709b7f maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver
Nvidia's driver defaults invalid enumerations to GL_CLAMP. Vulkan
doesn't expose GL_CLAMP through its API, but we can hack it on Nvidia's
driver using the internal driver defaults.
2020-01-10 17:12:50 -03:00
ReinUsesLisp
13021b534c shader_ir/texture: Simplify AOFFI code 2020-01-09 03:50:37 -03:00
ReinUsesLisp
e2a2a556b9 shader_ir/memory: Implement u16 and u8 for STG and LDG
Using the same technique we used for u8 on LDG, implement u16.

In the case of STG, load memory and insert the value we want to set
into it with bitfieldInsert. Then set that value.
2020-01-09 02:12:29 -03:00
ReinUsesLisp
908e085d02 vk_compute_pass: Add compute passes to emulate missing Vulkan features
This currently only supports quad arrays and u8 indices.

In the future we can remove quad arrays with a table written from the
CPU, but this was used to bootstrap the other passes helpers and it
was left in the code.

The blob code is generated from the "shaders/" directory. Read the
instructions there to know how to generate the SPIR-V.
2020-01-08 19:24:26 -03:00
ReinUsesLisp
82a64da077 vk_shader_util: Add helper to build SPIR-V shaders 2020-01-08 19:22:20 -03:00
Fernando Sahmkow
80436c1330 Merge pull request #3279 from ReinUsesLisp/vk-pipeline-cache
vk_pipeline_cache: Initial implementation
2020-01-08 17:31:20 -04:00
bunnei
319c4d2108 Merge pull request #3272 from bunnei/vi-close-layer
service: vi: Implement CloseLayer.
2020-01-07 12:45:34 -05:00
ReinUsesLisp
6888d776ff vk_pipeline_cache: Initial implementation
Given a pipeline key, this cache returns a pipeline abstraction (for
graphics or compute).
2020-01-06 22:02:26 -03:00
ReinUsesLisp
2effdeb924 vk_graphics_pipeline: Initial implementation
This abstractio represents the state of the 3D engine at a given draw.
Instead of changing individual bits of the pipeline how it's done in
APIs like D3D11, OpenGL and NVN; on Vulkan we are forced to put
everything together into a single, immutable object.

It takes advantage of the few dynamic states Vulkan offers.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
dc96a59fa0 vk_compute_pipeline: Initial implementation
This abstraction represents a Vulkan compute pipeline.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
b392a5986e vk_pipeline_cache: Add file and define descriptor update template filler
This function allows us to share code between compute and graphics
pipelines compilation.
2020-01-06 22:02:26 -03:00
ReinUsesLisp
3142f1b597 fixed_pipeline_state: Add depth clamp 2020-01-06 22:02:26 -03:00
ReinUsesLisp
9c548146ca vk_rasterizer: Add placeholder 2020-01-06 22:02:26 -03:00
bunnei
5be00cba15 Merge pull request #3276 from ReinUsesLisp/pipeline-reqs
vk_update_descriptor/vk_renderpass_cache: Add pipeline cache dependencies
2020-01-06 17:03:34 -05:00
bunnei
ee9b4a7f9a Merge pull request #3278 from ReinUsesLisp/vk-memory-manager
renderer_vulkan: Buffer cache, stream buffer and memory manager changes
2020-01-06 17:03:04 -05:00
ReinUsesLisp
5aeff9aff5 vk_renderpass_cache: Initial implementation
The renderpass cache is used to avoid creating renderpasses on each
draw. The hashed structure is not currently optimized.
2020-01-06 18:28:32 -03:00
ReinUsesLisp
322d6a0311 vk_update_descriptor: Initial implementation
The update descriptor is used to store in flat memory a large chunk of
staging data used to update descriptor sets through templates. It
provides a push interface to easily insert descriptors following the
current pipeline. The order used in the descriptor update template has
to be implicitly followed. We can catch bugs here using validation
layers.
2020-01-06 18:28:32 -03:00
ReinUsesLisp
5b01f80a12 vk_stream_buffer/vk_buffer_cache: Avoid halting and use generic cache
The stream buffer before this commit once it was full (no more bytes to
write before looping) waiting for all previous operations to finish.
This was a temporary solution and had a noticeable performance penalty
in performance (from what a profiler showed).

To avoid this mark with fences usages of the stream buffer and once it
loops wait for them to be signaled. On average this will never wait.
Each fence knows where its usage finishes, resulting in a non-paged
stream buffer.

On the other side, the buffer cache is reimplemented using the generic
buffer cache. It makes use of the staging buffer pool and the new
stream buffer.
2020-01-06 18:13:41 -03:00
ReinUsesLisp
ceb851b590 vk_memory_manager: Misc changes
* Allocate memory in discrete exponentially increasing chunks until the
128 MiB threshold. Allocations larger thant that increase linearly by
256 MiB (depending on the required size). This allows to use small
allocations for small resources.

* Move memory maps to a RAII abstraction. To optimize for debugging
tools (like RenderDoc) users will map/unmap on usage. If this ever
becomes a noticeable overhead (from my profiling it doesn't) we can
transparently move to persistent memory maps without harming the API,
getting optimal performance for both gameplay and debugging.

* Improve messages on exceptional situations.

* Fix typos "requeriments" -> "requirements".

* Small style changes.
2020-01-06 18:13:41 -03:00
ReinUsesLisp
85bb6a6f08 vk_buffer_cache: Temporarily remove buffer cache
This is intended for a follow up commit to avoid circular dependencies.
2020-01-06 17:58:46 -03:00
bunnei
984563b773 Merge pull request #3277 from ReinUsesLisp/make-current
yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers
2020-01-06 14:09:19 -05:00
ReinUsesLisp
8306703a7d yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers
MakeCurrent is a costly (according to Nsight's profiler it takes a tenth
of a millisecond to complete), and we don't have a reason to call it
because:
- Qt no longer signals a warning if it's not called
- yuzu no longer supports macOS
2020-01-06 14:02:47 -03:00
bunnei
09908207fb Merge pull request #3261 from degasus/page_table
core/memory + arm/dynarmic: Use a global offset within our arm page table.
2020-01-06 11:56:59 -05:00
bunnei
89fc75d769 Merge pull request #3257 from degasus/no_busy_loops
video_core: Block in WaitFence.
2020-01-06 00:09:57 -05:00
Fernando Sahmkow
56e450a3f7 Merge pull request #3264 from ReinUsesLisp/vk-descriptor-pool
vk_descriptor_pool: Initial implementation
2020-01-05 15:54:41 -04:00
bunnei
6fe51f398f Merge pull request #2945 from FernandoS27/fix-bcat
nifm: Only return that there's an internet connection when there's a BCATServer
2020-01-05 02:17:16 -05:00
bunnei
cd0a7dfdbc Merge pull request #3258 from FernandoS27/shader-amend
Shader_IR: add the ability to amend code in the shader ir.
2020-01-04 14:05:17 -05:00
Fernando Sahmkow
3dd6b55851 Shader_IR: Address Feedback 2020-01-04 14:40:57 -04:00
bunnei
64c5631579 service: vi: Implement CloseLayer.
- Needed for Undertale.
2020-01-04 00:45:06 -05:00
Rodrigo Locatti
6e347d8d1b Update src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
Co-Authored-By: Mat M. <mathew1800@gmail.com>
2020-01-03 17:34:30 -03:00
bunnei
624a0f7f3f Merge pull request #3247 from FernandoS27/remap-fix
NvServices: Correct Ioctl Remap.
2020-01-03 12:30:56 -05:00
bunnei
c332c66eb2 Merge pull request #3267 from ReinUsesLisp/remove-maxwell-debugger
yuzu: Remove Maxwell debugger
2020-01-02 22:03:30 -05:00
ReinUsesLisp
0d6d8129c4 yuzu: Remove Maxwell debugger
This was carried from Citra and wasn't really used on yuzu. It also adds
some runtime overhead. This commit removes it from yuzu's codebase.
2020-01-02 23:09:44 -03:00
bunnei
ae0e481677 Merge pull request #3243 from ReinUsesLisp/topologies
maxwell_to_gl: Implement missing primitive topologies
2020-01-01 20:33:33 -05:00
ReinUsesLisp
1fe7df4517 vk_descriptor_pool: Initial implementation
Create a large descriptor pool where we allocate all our descriptors
from. It has to be wide enough to support any pipeline, hence its large
numbers.

If the descritor pool is filled, we allocate more memory at that moment.
This way we can take advantage of permissive drivers like Nvidia's that
allocate more descriptors than what the spec requires.
2020-01-01 16:44:06 -03:00
Markus Wick
0986caa8d8 core/memory + arm/dynarmic: Use a global offset within our arm page table.
This saves us two x64 instructions per load/store instruction.

TODO: Clean up our memory code. We can use this optimization here as well.
2020-01-01 12:24:54 +01:00
bunnei
028b2718ed Merge pull request #3239 from ReinUsesLisp/p2r
shader/p2r: Implement P2R Pr
2019-12-31 20:37:16 -05:00
Fernando Sahmkow
b3371ed09e Shader_IR: add the ability to amend code in the shader ir.
This commit introduces a mechanism by which shader IR code can be
amended and extended. This useful for track algorithms where certain
information can derived from before the track such as indexes to array
samplers.
2019-12-30 15:31:48 -04:00
Fernando Sahmkow
7bd447355f Merge pull request #3248 from ReinUsesLisp/vk-image
vk_image: Add an image object abstraction
2019-12-30 14:25:14 -04:00
Rodrigo Locatti
4cbb363d3f vk_image: Avoid unnecesary equals 2019-12-30 13:28:23 -03:00
Fernando Sahmkow
287d5921cf Merge pull request #3249 from ReinUsesLisp/vk-staging-buffer-pool
vk_staging_buffer_pool: Add a staging pool for temporary operations
2019-12-30 12:25:59 -04:00
Markus Wick
cb9dd01ffd video_core: Block in WaitFence.
This function is called rarely and blocks quite often for a long time.
So don't waste power and let the CPU sleep.

This might also increase the performance as the other cores might be allowed to clock higher.
2019-12-30 13:04:53 +01:00
Rodrigo Locatti
f2c61bbe13 vk_staging_buffer_pool: Initialize last epoch to zero 2019-12-29 19:19:43 -03:00
Fernando Sahmkow
f846e3d6d0 Merge pull request #3250 from ReinUsesLisp/empty-fragment
gl_rasterizer: Allow rendering without fragment shader
2019-12-28 14:33:53 -04:00
bunnei
8a76f816a4 Merge pull request #3228 from ReinUsesLisp/ptp
shader/texture: Implement AOFFI and PTP for TLD4 and TLD4S
2019-12-26 21:43:44 -05:00
ReinUsesLisp
5b989f189f gl_rasterizer: Allow rendering without fragment shader
Rendering without a fragment shader is usually used in depth-only
passes.
2019-12-26 16:38:49 -03:00
ReinUsesLisp
3813af2f3c vk_staging_buffer_pool: Add a staging pool for temporary operations
The job of this abstraction is to provide staging buffers for temporary
operations. Think of image uploads or buffer uploads to device memory.

It automatically deletes unused buffers.
2019-12-25 18:12:17 -03:00
ReinUsesLisp
c83bf7cd1e vk_image: Add an image object abstraction
This object's job is to contain an image and manage its transitions.
Since Nvidia hardware doesn't know what a transition is but Vulkan
requires them anyway, we have to state track image subresources
individually.

To avoid the overhead of tracking each subresource in images with many
subresources (think of cubemap arrays with several mipmaps), this commit
tracks when subresources have diverged. As long as this doesn't happen
we can check the state of the first subresource (that will be shared
with all subresources) and update accordingly.

Image transitions are deferred to the scheduler command buffer.
2019-12-25 18:00:16 -03:00
Fernando Sahmkow
a5bb1ac6e3 NvServices: Correct Ioctl Remap.
This commit corrects a padding value in Ioctl Remap that was actually an 
offset to the mapping address.
2019-12-25 14:37:28 -04:00
Fernando Sahmkow
5619d24377 Merge pull request #3244 from ReinUsesLisp/vk-fps
fixed_pipeline_state: Define structure and loaders
2019-12-25 14:31:29 -04:00
bunnei
4af569ee47 Merge pull request #3236 from ReinUsesLisp/rasterize-enable
gl_rasterizer: Implement RASTERIZE_ENABLE
2019-12-24 22:54:10 -05:00
ReinUsesLisp
b9e3f5eb36 fixed_pipeline_state: Define symetric operator!= and mark as noexcept
Marks as noexcept Hash, operator== and operator!= for consistency.
2019-12-24 18:24:08 -03:00
ReinUsesLisp
4a3026b16b fixed_pipeline_state: Define structure and loaders
The intention behind this hasheable structure is to describe the state
of fixed function pipeline state that gets compiled to a single graphics
pipeline state object. This is all dynamic state in OpenGL but Vulkan
wants it in an immutable state, even if hardware can edit it freely.

In this commit the structure is defined in an optimized state (it uses
booleans, has paddings and many data entries that can be packed to
single integers). This is intentional as an initial implementation that
is easier to debug, implement and review. It will be optimized in later
stages, or it might change if Vulkan gets more dynamic states.
2019-12-22 22:59:11 -03:00
ReinUsesLisp
5770418fb3 maxwell_3d: Add depth bounds registers 2019-12-22 22:55:06 -03:00
ReinUsesLisp
91d35559e5 maxwell_to_gl: Implement missing primitive topologies
Many of these topologies are exclusively available in OpenGL.
2019-12-22 22:33:01 -03:00
bunnei
e976d0e924 Merge pull request #3241 from ReinUsesLisp/gl-shader-cache
gl_shader_cache: Style changes
2019-12-22 16:23:46 -05:00
bunnei
1e76655f83 Merge pull request #3238 from ReinUsesLisp/vk-resource-manager
vk_resource_manager: Catch device losses and other changes
2019-12-22 15:57:16 -05:00
bunnei
0f3ac9cfeb Merge pull request #3203 from FernandoS27/tex-cache-fixes
Texture Cache: Add HLE methods for building 3D textures
2019-12-22 14:25:13 -05:00
Fernando Sahmkow
3dc585d011 Merge pull request #3237 from ReinUsesLisp/vk-shader-decompiler
vk_shader_decompiler: Misc changes
2019-12-22 12:36:56 -04:00
Fernando Sahmkow
218ee18417 Texture Cache: Improve documentation 2019-12-22 12:29:23 -04:00
Fernando Sahmkow
a3916588b6 Texture Cache: Address Feedback 2019-12-22 12:24:34 -04:00
Fernando Sahmkow
51c9e98677 Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios.
This commit adds a series of HLE methods for handling 3D textures in
general. This helps games that generate 3D textures on every frame and
may reduce loading times for certain games.
2019-12-22 12:24:34 -04:00
Fernando Sahmkow
aea978e037 Merge pull request #3230 from ReinUsesLisp/vk-emu-shaders
renderer_vulkan/shader: Add helper GLSL shaders
2019-12-22 11:23:09 -04:00
Fernando Sahmkow
27efcc15e9 Merge pull request #3240 from ReinUsesLisp/decomp-cond-code
vk_shader_decompiler: Use Visit instead of reimplementing it
2019-12-22 11:20:55 -04:00
bunnei
16dcfacbfc Merge pull request #3235 from ReinUsesLisp/ldg-u8
shader/memory: Implement LDG.U8 and unaligned U8 loads
2019-12-21 22:50:28 -05:00
ReinUsesLisp
1e16023d60 gl_shader_cache: Update commentary for shared memory
Remove false commentary. Not dividing by 4 the size of shared memory is
not a hack; it describes the number of integers, not bytes.

While we are at it sort the generated code to put preprocessor lines on
the top.
2019-12-20 22:51:21 -03:00
ReinUsesLisp
486c6a5316 gl_shader_cache: Remove unused entry in GetPrimitiveDescription 2019-12-20 22:49:30 -03:00
ReinUsesLisp
af93909c9c vk_shader_decompiler: Use Visit instead of reimplementing it
ExprCondCode visit implements the generic Visit. Use this instead of
that one.

As an intended side effect this fixes unwritten memory usages in cases
when a negation of a condition code is used.
2019-12-20 21:36:25 -03:00
ReinUsesLisp
38d3a48873 shader/p2r: Implement P2R Pr
P2R dumps predicate or condition codes state to a register. This is
useful for unit testing.
2019-12-20 18:02:41 -03:00
ReinUsesLisp
cf27b59493 shader/r2p: Refactor P2R to support P2R 2019-12-20 17:55:42 -03:00
bunnei
7be65c6a68 Merge pull request #3234 from ReinUsesLisp/i2f-u8-selector
shader/conversion: Implement byte selector in I2F
2019-12-19 22:36:26 -05:00
bunnei
6d55b14cc0 Merge pull request #3233 from ReinUsesLisp/mismatch-sizes
shader/texture: Properly shrink unused entries in size mismatches
2019-12-19 20:40:27 -05:00
ReinUsesLisp
e41da22c8d vk_resource_manager: Add entry to VKFence to test its usage 2019-12-19 16:31:34 -03:00
ReinUsesLisp
ec983a2451 vk_reosurce_manager: Add assert for releasing fences
Notify the programmer when a request to release a fence is invalid
because the fence is already free.
2019-12-19 16:31:34 -03:00
ReinUsesLisp
6ddffa010a vk_resource_manager: Implement VKFenceWatch move constructor
This allows us to put VKFenceWatch inside a std::vector without storing
it in heap. On move we have to signal the fences where the new protected
resource is, adding some overhead.
2019-12-19 16:31:34 -03:00
ReinUsesLisp
54747d60bc vk_device: Add entry to catch device losses
VK_NV_device_diagnostic_checkpoints allows us to push data to a Vulkan
queue and then query it even after a device loss. This allows us to push
the current pipeline object and see what was the call that killed the
device.
2019-12-19 16:31:33 -03:00
ReinUsesLisp
2a63b3bdb9 vk_shader_decompiler: Fix full decompilation
When full decompilation was enabled, labels were not being inserted and
instructions were misused. Fix these bugs.
2019-12-19 16:24:45 -03:00
ReinUsesLisp
de918ebeb0 vk_shader_decompiler: Skip NDC correction when it is native
Avoid changing gl_Position when the NDC used by the game is [0, 1]
(Vulkan's native).
2019-12-19 16:24:45 -03:00
ReinUsesLisp
485c21eac3 vk_shader_decompiler: Normalize output fragment attachments
Some games write from fragment shaders to an unexistant framebuffer
attachment or they don't write to one when it exists in the framebuffer.
Fix this by skipping writes or adding zeroes.
2019-12-19 16:24:45 -03:00
bunnei
1eb4a95d2b Merge pull request #3232 from ReinUsesLisp/gl-decompiler-images
gl_shader_decompiler: Add missing DeclareImages
2019-12-19 11:32:47 -05:00
bunnei
253aa52351 Merge pull request #3231 from ReinUsesLisp/tld4s-encoding
shader_bytecode: Fix TLD4S encoding
2019-12-19 11:32:25 -05:00
ReinUsesLisp
f4a25f854c vk_device: Add query for RGBA8Uint 2019-12-19 02:08:29 -03:00
ReinUsesLisp
abb33d4aec vk_shader_decompiler: Update sirit and implement Texture AOFFI 2019-12-19 01:42:13 -03:00
bunnei
d53cf05513 Merge pull request #3221 from ReinUsesLisp/vk-scheduler
vk_scheduler: Delegate commands to a worker thread and state track
2019-12-18 22:04:08 -05:00
ReinUsesLisp
da0aa4da6b gl_rasterizer: Implement RASTERIZE_ENABLE
RASTERIZE_ENABLE is the opposite of GL_RASTERIZER_DISCARD. Implement it
naturally using this.

NVN games expect rasterize to be enabled by default, reflect that in our
initial GPU state.
2019-12-18 19:28:23 -03:00
ReinUsesLisp
ae8d4b6c0c shader/memory: Implement LDG.U8 and unaligned U8 loads
LDG can load single bytes instead of full integers or packs of integers.
These have the advantage of loading bytes that are not aligned to 4
bytes.

To emulate these this commit gets the byte being referenced (by doing
"address & 3" and then using that to extract the byte from the loaded
integer:

result = bitfieldExtract(loaded_integer, (address % 4) * 8, 8)
2019-12-18 01:21:46 -03:00
ReinUsesLisp
a7d6bd1ef1 shader/conversion: Implement byte selector in I2F
I2F's byte selector is used to choose what bytes to convert to float.
e.g. if the input is 0xaabbccdd and the selector is ".B3" it will
convert 0xaa. The default (when it's not shown in nvdisasm) is ".B0", in
that example the default would convert 0xdd to float.
2019-12-18 00:41:22 -03:00
bunnei
c053269017 Merge pull request #3227 from amilajack/patch-1
delete appveyor config
2019-12-17 21:49:22 -05:00
ReinUsesLisp
15a753b9a5 shader/texture: Properly shrink unused entries in size mismatches
When a image format mismatches we were inserting zeroes to the texture
itself. This was not handling cases were the mismatch uses less
coordinates than the guest shader code. Address that by resizing the
vector.
2019-12-17 23:38:10 -03:00
ReinUsesLisp
e438079b50 gl_shader_decompiler: Add missing DeclareImages 2019-12-17 23:34:15 -03:00
ReinUsesLisp
8b26b4228b shader_bytecode: Fix TLD4S encoding 2019-12-17 23:32:10 -03:00
bunnei
8825b88a45 Merge pull request #3173 from yuzu-emu/bunnei-spscqueue
common: SPSCQueue: Notify after incrementing queue size.
2019-12-17 14:11:20 -05:00
Amila Welihinda
8a23c32cf0 delete .appeveyor dir 2019-12-17 00:20:34 -08:00
bunnei
67b8ecc73e common: SPSCQueue: Notify after incrementing queue size. 2019-12-16 20:39:53 -05:00
ReinUsesLisp
b52297767e renderer_vulkan/shader: Add helper GLSL shaders
These shaders are used to specify code that is not dynamically generated
in the Vulkan backend. Instead of packing it inside the build system,
it's manually built and copied to the C++ file to avoid adding
unnecessary build time dependencies.

quad_array should be dropped in the future since it can be emulated with
a memory pool generated from the CPU.
2019-12-16 17:59:08 -03:00
bunnei
65b1b05e05 Merge pull request #3182 from ReinUsesLisp/renderer-opengl
renderer_opengl: Miscellaneous clean ups
2019-12-16 13:01:04 -05:00
ReinUsesLisp
e09c1fbc1f shader/texture: Implement TLD4.PTP 2019-12-16 04:09:24 -03:00
ReinUsesLisp
844e4a297b shader/texture: Enable arrayed TLD4 2019-12-16 02:37:21 -03:00
ReinUsesLisp
a87c85eba2 gl_shader_decompiler: Rename "sepparate" to "separate" 2019-12-16 02:12:51 -03:00
ReinUsesLisp
3d2c44848b shader/texture: Implement AOFFI for TLD4S 2019-12-16 02:06:42 -03:00
ReinUsesLisp
3d9fff82c0 shader/texture: Remove unnecesary parenthesis 2019-12-16 01:52:33 -03:00
Rodrigo Locatti
eac075692b Merge pull request #3219 from FernandoS27/fix-bindless
Corrections and fixes to TLD4S & bindless samplers failing
2019-12-16 01:26:11 -03:00
Amila Welihinda
0471eb6dc7 delete appveyor config 2019-12-15 11:16:39 -08:00
bunnei
3d51153611 Merge pull request #3222 from ReinUsesLisp/maxwell-to-vk
maxwell_to_vk: Use VK_EXT_index_type_uint8 and misc changes
2019-12-14 22:30:12 -05:00
bunnei
ccda77c8c4 Merge pull request #3224 from bunnei/boost-ext-update
externals: Update boost-ext to include safe_numerics.
2019-12-14 16:13:47 -05:00
bunnei
035ec7d9de Merge pull request #3213 from ReinUsesLisp/intel-mesa
gl_device: Enable compute shaders for Intel Mesa drivers
2019-12-14 16:04:31 -05:00
bunnei
285705b5f4 externals: Update boost-ext to include safe_numerics.
- This is useful to me for an upcoming change.
2019-12-14 03:04:42 -05:00
bunnei
2b650543c6 Merge pull request #3212 from ReinUsesLisp/fix-smem-lmem
gl_shader_cache: Add missing new-line on emitted GLSL
2019-12-13 21:35:29 -05:00
ReinUsesLisp
e3ea583893 maxwell_to_vk: Improve image format table and add more formats
A1B5G5R5 uses A1R5G5B5. This is flipped with image view swizzles;
flushing is still not properly implemented on Vulkan for this particular
format.
2019-12-13 03:12:29 -03:00
ReinUsesLisp
f27b21077d maxwell_to_vk: Implement more vertex formats 2019-12-13 03:12:28 -03:00
ReinUsesLisp
8db8631d81 maxwell_to_vk: Implement more primitive topologies
Add an extra argument to query device capabilities in the future. The
intention behind this is to use native quads, quad strips, line loops
and polygons if these are released for Vulkan.
2019-12-13 03:12:28 -03:00
ReinUsesLisp
15513f0801 maxwell_to_vk: Approach GL_CLAMP closer to the GL spec
The OpenGL spec defines GL_CLAMP's formula similarly to CLAMP_TO_EDGE
and CLAMP_TO_BORDER depending on the filter mode used. It doesn't
exactly behave like this, but it's the closest we can get with what
Vulkan offers without emulating it by injecting shader code.
2019-12-13 03:12:28 -03:00
ReinUsesLisp
f845df8651 maxwell_to_vk: Use VK_EXT_index_type_uint8 when available 2019-12-13 02:37:23 -03:00
ReinUsesLisp
2df9a2dcaf vk_scheduler: Delegate commands to a worker thread and state track
Introduce a worker thread approach for delegating Vulkan work derived
from dxvk's approach. https://github.com/doitsujin/dxvk

Now that the scheduler is what handles all Vulkan work related to
command streaming, store state tracking in itself. This way we can know
when to reupload Vulkan dynamic state to the queue (since this one is
invalidated between command buffers unlike NVN). We can also store the
renderpass state and graphics pipeline bound to avoid redundant binds
and renderpass begins/ends.
2019-12-13 02:24:48 -03:00
bunnei
6d0d79109b Merge pull request #3214 from lioncash/svc-func
kernel/svc: Amend function signature of SignalProcessWideKey
2019-12-12 21:32:36 -05:00
bunnei
8fc49a83b6 Merge pull request #3217 from jhol/fix-boost-include
Added missing include
2019-12-11 22:21:24 -05:00
Fernando Sahmkow
c0ee0aa1a8 Shader_IR: Correct TLD4S Depth Compare. 2019-12-11 19:53:17 -04:00
Fernando Sahmkow
af89723fa3 Shader_Ir: Correct TLD4S encoding and implement f16 flag. 2019-12-11 19:53:17 -04:00
Fernando Sahmkow
84a158c977 Gl_Shader_compiler: Correct Depth Compare for Texture Gather operations. 2019-12-11 19:53:16 -04:00
Fernando Sahmkow
271a3264f3 Shader_Ir: default failed tracks on bindless samplers to null values. 2019-12-11 19:53:16 -04:00
Fernando Sahmkow
900b2e5cae Merge pull request #3218 from FernandoS27/tess-gl
Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are unimplemented
2019-12-11 17:50:09 -04:00
Fernando Sahmkow
1d2ba3cc97 Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are un implemented.
This commit ensures the OGL backend does not execute tesselation shader 
stages as they are currently unimplemented.
2019-12-11 15:41:26 -04:00
bunnei
1a66cde175 Merge pull request #3210 from ReinUsesLisp/memory-barrier
shader: Implement MEMBAR.GL
2019-12-11 14:24:39 -05:00
Joel Holdsworth
e9faa1617c Added missing include 2019-12-11 18:11:49 +00:00
Fernando Sahmkow
22c6b9fab2 Kernel: Correct behavior of Address Arbiter threads. (#3165)
* Kernel: Correct behavior of Address Arbiter threads.

This corrects arbitration threads to behave just like in Horizon OS.
They are added into a container and released according to what priority
they had when added. Horizon OS does not reorder them if their priority
changes.

* Kernel: Address Feedback.
2019-12-11 10:55:38 -05:00
Lioncash
30e365e4fc kernel/svc: Correct function signature of SignalProcessWideKey
This function doesn't actually return a result code, so we can amend the
signature of it to match.
2019-12-11 07:13:27 -05:00
ReinUsesLisp
f564eaebed gl_device: Enable compute shaders for Intel Mesa drivers
Previously we naively checked for "Intel" in GL_VENDOR, but this
includes both Intel's proprietary driver and the mesa driver. Re-enable
compute shaders for mesa.
2019-12-11 00:00:30 -03:00
ReinUsesLisp
48e16c4c49 gl_shader_cache: Add missing new-line on emitted GLSL
Add missing new-line. This caused shaders using local memory and shared
memory to inject a preprocessor GLSL line after an expression (resulting
in invalid code).

It looked like this:
shared uint smem[8];#define LOCAL_MEMORY_SIZE 16

It should look like this (addressed by this commit):
shared uint smem[8];
\#define LOCAL_MEMORY_SIZE 16
2019-12-10 23:52:51 -03:00
bunnei
34f8881d3e Merge pull request #3201 from lioncash/dump
kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew
2019-12-10 21:48:37 -05:00
Rodrigo Locatti
c8db7d1399 Merge pull request #3211 from FernandoS27/depth-mode
Maxwell3D: Implement Depth Mode.
2019-12-10 21:20:52 -03:00
Fernando Sahmkow
7ffb672f61 Maxwell3D: Implement Depth Mode.
This commit finishes adding depth mode that was reverted before due to
other unresolved issues.
2019-12-10 19:51:46 -04:00
ReinUsesLisp
425a254fa2 shader: Implement MEMBAR.GL
Implement using memoryBarrier in GLSL and OpMemoryBarrier on SPIR-V.
2019-12-10 16:45:03 -03:00
Fernando Sahmkow
6edadef96d Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler
vk_shader_decompiler: Add tessellation and misc changes
2019-12-10 08:01:41 -04:00
Lioncash
67b8265bd6 kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew
These are fairly trivial to implement, we can just do nothing. This also
provides a spot for us to potentially dump out any relevant info in the
future (e.g. for debugging purposes with homebrew, etc).

While we're at it, we can also correct the names of both of these
supervisor calls.
2019-12-07 22:01:17 -05:00
ReinUsesLisp
e6a0a30334 renderer_opengl: Make ScreenRectVertex's constructor constexpr 2019-11-28 20:36:02 -03:00
ReinUsesLisp
dee7844443 renderer_opengl: Remove C casts 2019-11-28 20:28:27 -03:00
ReinUsesLisp
3a44faff11 renderer_opengl: Use explicit binding for presentation shaders 2019-11-28 20:25:56 -03:00
ReinUsesLisp
75cc501d52 renderer_opengl: Drop macros for message decorations 2019-11-28 20:15:25 -03:00
ReinUsesLisp
056f049b26 renderer_opengl: Move static definitions to anonymous namespace 2019-11-28 20:14:40 -03:00
ReinUsesLisp
4589582eaf renderer_opengl: Move commentaries to header file 2019-11-28 20:11:03 -03:00
Fernando Sahmkow
3c95e49c42 nifm: Only return that there's an internet connection when there's a BCATServer
This helps games that need internet for other purposes boot as the rest
of our internet infrastructure is incomplete.
2019-11-06 23:10:32 -05:00
138 changed files with 8611 additions and 2044 deletions

View File

@@ -1,39 +0,0 @@
# Set-up Visual Studio Command Prompt environment for PowerShell
pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
if ($_ -match "=") {
$v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
}
}
popd
function Which ($search_path, $name) {
($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
}
function GetDeps ($search_path, $binary) {
((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
Which $search_path $_.Value
}
}
function RecursivelyGetDeps ($search_path, $binary) {
$final_deps = @()
$deps_to_process = GetDeps $search_path $binary
while ($deps_to_process.Count -gt 0) {
$current, $deps_to_process = $deps_to_process
if ($final_deps -contains $current) { continue }
# Is this a system dll file?
# We use the same algorithm that cmake uses to determine this.
if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
if ($current -match "\\msvc[^\\]+dll") { continue }
if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
$final_deps += $current
$new_deps = GetDeps $search_path $current
$deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
}
return $final_deps
}

View File

@@ -5,6 +5,10 @@ function(get_timestamp _var)
endfunction()
list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
# Find the package here with the known path so that the GetGit commands can find it as well
find_package(Git QUIET PATHS "${GIT_EXECUTABLE}")
# generate git/build information
include(GetGitRevisionDescription)
get_git_head_revision(GIT_REF_SPEC GIT_REV)

View File

@@ -1,178 +0,0 @@
# shallow clone
clone_depth: 10
cache:
- C:\ProgramData\chocolatey\bin -> appveyor.yml
- C:\ProgramData\chocolatey\lib -> appveyor.yml
os: Visual Studio 2017
environment:
# Tell msys2 to add mingw64 to the path
MSYSTEM: MINGW64
# Tell msys2 to inherit the current directory when starting the shell
CHERE_INVOKING: 1
matrix:
- BUILD_TYPE: msvc
- BUILD_TYPE: mingw
platform:
- x64
configuration:
- Release
install:
- git submodule update --init --recursive
- ps: |
if ($env:BUILD_TYPE -eq 'mingw') {
$dependencies = "mingw64/mingw-w64-x86_64-cmake",
"mingw64/mingw-w64-x86_64-qt5",
"mingw64/mingw-w64-x86_64-SDL2"
# redirect err to null to prevent warnings from becoming errors
# workaround to prevent pacman from failing due to cyclical dependencies
C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
}
before_build:
- mkdir %BUILD_TYPE%_build
- cd %BUILD_TYPE%_build
- ps: |
$COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
if ($env:BUILD_TYPE -eq 'msvc') {
# redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
} else {
C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
}
- cd ..
build_script:
- ps: |
if ($env:BUILD_TYPE -eq 'msvc') {
# https://www.appveyor.com/docs/build-phase
msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
} else {
C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
}
after_build:
- ps: |
$GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
$GITREV = $(git show -s --format='%h')
# Find out which kind of release we are producing by tag name
if ($env:APPVEYOR_REPO_TAG_NAME) {
$RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
} else {
# There is no repo tag - make assumptions
$RELEASE_DIST = "head"
}
if ($env:BUILD_TYPE -eq 'msvc') {
# Where are these spaces coming from? Regardless, let's remove them
$MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
$MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
$MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
# set the build names as env vars so the artifacts can upload them
$env:BUILD_ZIP = $MSVC_BUILD_ZIP
$env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
$env:BUILD_UPDATE = $MSVC_SEVENZIP
$BUILD_DIR = ".\msvc_build\bin\Release"
# Make a debug symbol upload
mkdir pdb
Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
rm "$BUILD_DIR\*.pdb"
mkdir $RELEASE_DIST
# get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
rm "$RELEASE_DIST\*.exe"
Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
Copy-Item .\license.txt -Destination $RELEASE_DIST
Copy-Item .\README.md -Destination $RELEASE_DIST
7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
7z a $MSVC_SEVENZIP $RELEASE_DIST
} else {
$MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
$MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
# not going to bother adding separate debug symbols for mingw, so just upload a README for it
# if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
$MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
# store the build information in env vars so we can use them as artifacts
$env:BUILD_ZIP = $MINGW_BUILD_ZIP
$env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
$env:BUILD_UPDATE = $MINGW_SEVENZIP
$CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
$CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
$RELEASE_DIST = $RELEASE_DIST + "-mingw"
mkdir $RELEASE_DIST
mkdir $RELEASE_DIST/platforms
mkdir $RELEASE_DIST/styles
mkdir $RELEASE_DIST/imageformats
# copy the compiled binaries and other release files to the release folder
Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
# copy the qt windows plugin dll to platforms
Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
# copy the qt windows vista style dll to platforms
Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
# copy the qt jpeg imageformat dll to platforms
Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
# copy all the dll dependencies to the release folder
. "./.appveyor/UtilityFunctions.ps1"
$DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
$MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
$MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu_cmd.exe"
$MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\imageformats\qjpeg.dll"
Write-Host "Detected the following dependencies:"
Write-Host $MingwDLLs
foreach ($file in $MingwDLLs) {
Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
}
7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
7z a $MINGW_SEVENZIP $RELEASE_DIST
}
test_script:
- cd %BUILD_TYPE%_build
- ps: |
if ($env:BUILD_TYPE -eq 'msvc') {
ctest -VV -C Release
} else {
C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
}
- cd ..
artifacts:
- path: $(BUILD_ZIP)
name: build
type: zip
deploy:
provider: GitHub
release: $(appveyor_repo_tag_name)
auth_token:
secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
artifact: update,build
draft: false
prerelease: false
on:
appveyor_repo_tag: true

View File

@@ -15,6 +15,10 @@ endif ()
if (DEFINED ENV{DISPLAYVERSION})
set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
endif ()
# Pass the path to git to the GenerateSCMRev.cmake as well
find_package(Git QUIET)
add_custom_command(OUTPUT scm_rev.cpp
COMMAND ${CMAKE_COMMAND}
-DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
-DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
-DBUILD_TAG="${BUILD_TAG}"
-DBUILD_ID="${DISPLAY_VERSION}"
-DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
-P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
DEPENDS
# WARNING! It was too much work to try and make a common location for this list,

View File

@@ -44,20 +44,6 @@ template class Field<std::string>;
template class Field<const char*>;
template class Field<std::chrono::microseconds>;
#ifdef ARCHITECTURE_x86_64
static const char* CpuVendorToStr(Common::CPUVendor vendor) {
switch (vendor) {
case Common::CPUVendor::INTEL:
return "Intel";
case Common::CPUVendor::AMD:
return "Amd";
case Common::CPUVendor::OTHER:
return "Other";
}
UNREACHABLE();
}
#endif
void AppendBuildInfo(FieldCollection& fc) {
const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
#ifdef ARCHITECTURE_x86_64
fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);

View File

@@ -46,9 +46,16 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
cv.notify_one();
++size;
const size_t previous_size{size++};
// Acquire the mutex and then immediately release it as a fence.
// TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
// See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
if (previous_size == 0) {
std::lock_guard lock{cv_mutex};
}
cv.notify_one();
}
void Pop() {

View File

@@ -3,8 +3,6 @@
// Refer to the license.txt file included.
#include <cstring>
#include <string>
#include <thread>
#include "common/common_types.h"
#include "common/x64/cpu_detect.h"
@@ -51,8 +49,6 @@ namespace Common {
static CPUCaps Detect() {
CPUCaps caps = {};
caps.num_cores = std::thread::hardware_concurrency();
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
// yuzu at all anyway
@@ -70,12 +66,6 @@ static CPUCaps Detect() {
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
if (!strcmp(caps.brand_string, "GenuineIntel"))
caps.vendor = CPUVendor::INTEL;
else if (!strcmp(caps.brand_string, "AuthenticAMD"))
caps.vendor = CPUVendor::AMD;
else
caps.vendor = CPUVendor::OTHER;
// Set reasonable default brand string even if brand string not available
strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1)
caps.sse4_2 = true;
if ((cpu_id[2] >> 22) & 1)
caps.movbe = true;
if ((cpu_id[2] >> 25) & 1)
caps.aes = true;
if ((cpu_id[3] >> 24) & 1) {
caps.fxsave_fxrstor = true;
}
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
}
}
caps.flush_to_zero = caps.sse;
if (max_ex_fn >= 0x80000004) {
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1)
caps.lahf_sahf_64 = true;
if ((cpu_id[2] >> 5) & 1)
caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1)
caps.fma4 = true;
if ((cpu_id[3] >> 29) & 1)
caps.long_mode = true;
}
return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
return caps;
}
std::string GetCPUCapsString() {
auto caps = GetCPUCaps();
std::string sum(caps.cpu_string);
sum += " (";
sum += caps.brand_string;
sum += ")";
if (caps.sse)
sum += ", SSE";
if (caps.sse2) {
sum += ", SSE2";
if (!caps.flush_to_zero)
sum += " (without DAZ)";
}
if (caps.sse3)
sum += ", SSE3";
if (caps.ssse3)
sum += ", SSSE3";
if (caps.sse4_1)
sum += ", SSE4.1";
if (caps.sse4_2)
sum += ", SSE4.2";
if (caps.avx)
sum += ", AVX";
if (caps.avx2)
sum += ", AVX2";
if (caps.bmi1)
sum += ", BMI1";
if (caps.bmi2)
sum += ", BMI2";
if (caps.fma)
sum += ", FMA";
if (caps.aes)
sum += ", AES";
if (caps.movbe)
sum += ", MOVBE";
if (caps.long_mode)
sum += ", 64-bit support";
return sum;
}
} // namespace Common

View File

@@ -4,23 +4,12 @@
#pragma once
#include <string>
namespace Common {
/// x86/x64 CPU vendors that may be detected by this module
enum class CPUVendor {
INTEL,
AMD,
OTHER,
};
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
CPUVendor vendor;
char cpu_string[0x21];
char brand_string[0x41];
int num_cores;
bool sse;
bool sse2;
bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
bool fma;
bool fma4;
bool aes;
// Support for the FXSAVE and FXRSTOR instructions
bool fxsave_fxrstor;
bool movbe;
// This flag indicates that the hardware supports some mode in which denormal inputs and outputs
// are automatically set to (signed) zero.
bool flush_to_zero;
// Support for LAHF and SAHF instructions in 64-bit mode
bool lahf_sahf_64;
bool long_mode;
};
/**
@@ -57,10 +32,4 @@ struct CPUCaps {
*/
const CPUCaps& GetCPUCaps();
/**
* Gets a string summary of the name and supported capabilities of the host CPU
* @return String summary
*/
std::string GetCPUCapsString();
} // namespace Common

View File

@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
config.page_table_address_space_bits = address_space_bits;
config.silently_mirror_page_table = false;
config.absolute_offset_page_table = true;
// Multi-process state
config.processor_id = core_index;

View File

@@ -46,7 +46,6 @@
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "core/tools/freezer.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
@@ -341,7 +340,6 @@ struct System::Impl {
std::unique_ptr<Loader::AppLoader> app_loader;
std::unique_ptr<VideoCore::RendererBase> renderer;
std::unique_ptr<Tegra::GPU> gpu_core;
std::shared_ptr<Tegra::DebugContext> debug_context;
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
Memory::Memory memory;
CpuCoreManager cpu_core_manager;
@@ -580,14 +578,6 @@ Loader::AppLoader& System::GetAppLoader() const {
return *impl->app_loader;
}
void System::SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
impl->debug_context = std::move(context);
}
Tegra::DebugContext* System::GetGPUDebugContext() const {
return impl->debug_context.get();
}
void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
impl->virtual_filesystem = std::move(vfs);
}

View File

@@ -307,10 +307,6 @@ public:
Service::SM::ServiceManager& ServiceManager();
const Service::SM::ServiceManager& ServiceManager() const;
void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context);
Tegra::DebugContext* GetGPUDebugContext() const;
void SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs);
std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;

View File

@@ -17,10 +17,10 @@
#include "core/memory.h"
namespace Kernel {
namespace {
// Wake up num_to_wake (or all) threads in a vector.
void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
auto& system = Core::System::GetInstance();
void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
s32 num_to_wake) {
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
@@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
for (std::size_t i = 0; i < last; i++) {
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
RemoveThread(waiting_threads[i]);
waiting_threads[i]->SetArbiterWaitAddress(0);
waiting_threads[i]->ResumeFromWait();
system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
}
}
} // Anonymous namespace
AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
AddressArbiter::~AddressArbiter() = default;
@@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->SetArbiterWaitAddress(address);
InsertThread(SharedFrom(current_thread));
current_thread->SetStatus(ThreadStatus::WaitArb);
current_thread->InvalidateWakeupCallback();
current_thread->WakeAfterDelay(timeout);
@@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
return RESULT_TIMEOUT;
}
std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
VAddr address) const {
void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
RemoveThread(thread);
thread->SetArbiterWaitAddress(0);
}
// Retrieve all threads that are waiting for this address.
std::vector<std::shared_ptr<Thread>> threads;
const auto& scheduler = system.GlobalScheduler();
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetArbiterWaitAddress() == address) {
threads.push_back(thread);
void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
const VAddr arb_addr = thread->GetArbiterWaitAddress();
std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
auto it = thread_list.begin();
while (it != thread_list.end()) {
const std::shared_ptr<Thread>& current_thread = *it;
if (current_thread->GetPriority() >= thread->GetPriority()) {
thread_list.insert(it, thread);
return;
}
++it;
}
thread_list.push_back(std::move(thread));
}
// Sort them by priority, such that the highest priority ones come first.
std::sort(threads.begin(), threads.end(),
[](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
return lhs->GetPriority() < rhs->GetPriority();
});
void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
const VAddr arb_addr = thread->GetArbiterWaitAddress();
std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
auto it = thread_list.begin();
while (it != thread_list.end()) {
const std::shared_ptr<Thread>& current_thread = *it;
if (current_thread.get() == thread.get()) {
thread_list.erase(it);
return;
}
++it;
}
UNREACHABLE();
}
return threads;
std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
std::vector<std::shared_ptr<Thread>> result;
std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
auto it = thread_list.begin();
while (it != thread_list.end()) {
std::shared_ptr<Thread> current_thread = *it;
result.push_back(std::move(current_thread));
++it;
}
return result;
}
} // namespace Kernel

View File

@@ -4,7 +4,9 @@
#pragma once
#include <list>
#include <memory>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
@@ -48,6 +50,9 @@ public:
/// Waits on an address with a particular arbitration type.
ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
/// Removes a thread from the container and resets its address arbiter adress to 0
void HandleWakeupThread(std::shared_ptr<Thread> thread);
private:
/// Signals an address being waited on.
ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
// Waits on the given address with a timeout in nanoseconds
ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
/// Wake up num_to_wake (or all) threads in a vector.
void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
/// Insert a thread into the address arbiter container
void InsertThread(std::shared_ptr<Thread> thread);
/// Removes a thread from the address arbiter container
void RemoveThread(std::shared_ptr<Thread> thread);
// Gets the threads waiting on an address.
std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
/// List of threads waiting for a address arbiter
std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
Core::System& system;
};

View File

@@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
}
}
if (thread->GetArbiterWaitAddress() != 0) {
ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
thread->SetArbiterWaitAddress(0);
if (thread->GetStatus() == ThreadStatus::WaitArb) {
auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
address_arbiter.HandleWakeupThread(thread);
}
if (resume) {

View File

@@ -14,6 +14,9 @@ namespace Kernel {
// - Second to ensure all host backing memory used is aligned to 256 bytes due
// to strict alignment restrictions on GPU memory.
using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
class PhysicalMemory final : public PhysicalMemoryVector {
using PhysicalMemoryVector::PhysicalMemoryVector;
};
} // namespace Kernel

View File

@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
}
void Process::LoadModule(CodeSet module_, VAddr base_addr) {
code_memory_size += module_.memory.size();
const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
code_memory_size += module_.memory.size();
}
Process::Process(Core::System& system)

View File

@@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
}
/// Signal process wide key
static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
s32 target) {
static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
condition_variable_addr, target);
@@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
system.PrepareReschedule(thread->GetProcessorID());
}
}
return RESULT_SUCCESS;
}
// Wait for an address (via Address Arbiter)
@@ -1781,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
}
static void KernelDebug([[maybe_unused]] Core::System& system,
[[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
[[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
// Intentionally do nothing, as this does nothing in released kernel binaries.
}
static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
[[maybe_unused]] u32 trace_state) {
// Intentionally do nothing, as this does nothing in released kernel binaries.
}
/// This returns the total CPU ticks elapsed since the CPU was powered-on
static u64 GetSystemTick(Core::System& system) {
LOG_TRACE(Kernel_SVC, "called");
@@ -2418,8 +2426,8 @@ static const FunctionDef SVC_Table[] = {
{0x39, nullptr, "Unknown"},
{0x3A, nullptr, "Unknown"},
{0x3B, nullptr, "Unknown"},
{0x3C, nullptr, "DumpInfo"},
{0x3D, nullptr, "DumpInfoNew"},
{0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
{0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
{0x3E, nullptr, "Unknown"},
{0x3F, nullptr, "Unknown"},
{0x40, nullptr, "CreateSession"},

View File

@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
FuncReturn(system, retval);
}
template <ResultCode func(Core::System&, u64, s32)>
void SvcWrap(Core::System& system) {
FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
}
template <ResultCode func(Core::System&, u64, u32)>
void SvcWrap(Core::System& system) {
FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) {
func(system);
}
template <void func(Core::System&, u32)>
void SvcWrap(Core::System& system) {
func(system, static_cast<u32>(Param(system, 0)));
}
template <void func(Core::System&, u32, u64, u64, u64)>
void SvcWrap(Core::System& system) {
func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
Param(system, 3));
}
template <void func(Core::System&, s64)>
void SvcWrap(Core::System& system) {
func(system, static_cast<s64>(Param(system, 0)));
}
template <void func(Core::System&, u64, s32)>
void SvcWrap(Core::System& system) {
func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
}
template <void func(Core::System&, u64, u64)>
void SvcWrap(Core::System& system) {
func(system, Param(system, 0), Param(system, 1));

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include <iterator>
#include <utility>
#include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
// If necessary, expand backing vector to cover new heap extents in
// the case of allocating. Otherwise, shrink the backing memory,
// if a smaller heap has been requested.
const u64 old_heap_size = GetCurrentHeapSize();
if (size > old_heap_size) {
const u64 alloc_size = size - old_heap_size;
heap_memory->insert(heap_memory->end(), alloc_size, 0);
RefreshMemoryBlockMappings(heap_memory.get());
} else if (size < old_heap_size) {
heap_memory->resize(size);
heap_memory->shrink_to_fit();
RefreshMemoryBlockMappings(heap_memory.get());
}
heap_memory->resize(size);
heap_memory->shrink_to_fit();
RefreshMemoryBlockMappings(heap_memory.get());
heap_end = heap_region_base + size;
ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
// Always merge allocated memory blocks, even when they don't share the same backing block.
if (left.type == VMAType::AllocatedMemoryBlock &&
(left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
const auto right_begin = right.backing_block->begin() + right.offset;
const auto right_end = right_begin + right.size;
// Check if we can save work.
if (left.offset == 0 && left.size == left.backing_block->size()) {
// Fast case: left is an entire backing block.
left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
left.backing_block->resize(left.size + right.size);
std::memcpy(left.backing_block->data() + left.size,
right.backing_block->data() + right.offset, right.size);
} else {
// Slow case: make a new memory block for left and right.
const auto left_begin = left.backing_block->begin() + left.offset;
const auto left_end = left_begin + left.size;
const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
auto new_memory = std::make_shared<PhysicalMemory>();
new_memory->reserve(left_size + right_size);
new_memory->insert(new_memory->end(), left_begin, left_end);
new_memory->insert(new_memory->end(), right_begin, right_end);
new_memory->resize(left.size + right.size);
std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
right.size);
left.backing_block = std::move(new_memory);
left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
memory.UnmapRegion(page_table, vma.base, vma.size);
break;
case VMAType::AllocatedMemoryBlock:
memory.MapMemoryRegion(page_table, vma.base, vma.size,
vma.backing_block->data() + vma.offset);
memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
break;
case VMAType::BackingMemory:
memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);

View File

@@ -9,6 +9,7 @@
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/service.h"
#include "core/settings.h"
namespace Service::NIFM {
@@ -86,7 +87,12 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.PushEnum(RequestState::Connected);
if (Settings::values.bcat_backend == "none") {
rb.PushEnum(RequestState::NotSubmitted);
} else {
rb.PushEnum(RequestState::Connected);
}
}
void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(1);
if (Settings::values.bcat_backend == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
}
}
void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NIFM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(1);
if (Settings::values.bcat_backend == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
}
}
Core::System& system;
};

View File

@@ -104,10 +104,12 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
ASSERT(object->status == nvmap::Object::Status::Allocated);
u64 size = static_cast<u64>(entry.pages) << 0x10;
const u64 size = static_cast<u64>(entry.pages) << 0x10;
ASSERT(size <= object->size);
const u64 map_offset = static_cast<u64>(entry.map_offset) << 0x10;
GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
const GPUVAddr returned =
gpu.MemoryManager().MapBufferEx(object->addr + map_offset, offset, size);
ASSERT(returned == offset);
}
std::memcpy(output.data(), entries.data(), output.size());

View File

@@ -62,7 +62,7 @@ private:
u16_le flags;
u16_le kind;
u32_le nvmap_handle;
INSERT_PADDING_WORDS(1);
u32_le map_offset;
u32_le offset;
u32_le pages;
};

View File

@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
return layer_id;
}
void NVFlinger::CloseLayer(u64 layer_id) {
for (auto& display : displays) {
display.CloseLayer(layer_id);
}
}
std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
const auto* const layer = FindLayer(display_id, layer_id);
@@ -192,7 +198,7 @@ void NVFlinger::Compose() {
const auto& igbp_buffer = buffer->get().igbp_buffer;
const auto& gpu = system.GPU();
auto& gpu = system.GPU();
const auto& multi_fence = buffer->get().multi_fence;
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];

View File

@@ -54,6 +54,9 @@ public:
/// If an invalid display ID is specified, then an empty optional is returned.
std::optional<u64> CreateLayer(u64 display_id);
/// Closes a layer on all displays for the given layer ID.
void CloseLayer(u64 layer_id);
/// Finds the buffer queue ID of the specified layer in the specified display.
///
/// If an invalid display ID or layer ID is provided, then an empty optional is returned.

View File

@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
Display::~Display() = default;
Layer& Display::GetLayer(std::size_t index) {
return layers.at(index);
return *layers.at(index);
}
const Layer& Display::GetLayer(std::size_t index) const {
return layers.at(index);
return *layers.at(index);
}
std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
layers.emplace_back(id, buffer_queue);
layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
}
void Display::CloseLayer(u64 id) {
layers.erase(
std::remove_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
layers.end());
}
Layer* Display::FindLayer(u64 id) {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
const auto itr =
std::find_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
return itr->get();
}
const Layer* Display::FindLayer(u64 id) const {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
const auto itr =
std::find_if(layers.begin(), layers.end(),
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
return itr->get();
}
} // namespace Service::VI

View File

@@ -4,6 +4,7 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
@@ -69,6 +70,12 @@ public:
///
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
/// Closes and removes a layer from this display with the given ID.
///
/// @param id The ID assigned to the layer to close.
///
void CloseLayer(u64 id);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
u64 id;
std::string name;
std::vector<Layer> layers;
std::vector<std::shared_ptr<Layer>> layers;
Kernel::EventPair vsync_event;
};

View File

@@ -1066,6 +1066,18 @@ private:
rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
}
void CloseLayer(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto layer_id{rp.Pop<u64>()};
LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
nv_flinger->CloseLayer(layer_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
{1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
{1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
{2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
{2021, nullptr, "CloseLayer"},
{2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
{2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
{2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
{2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},

View File

@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
codeset_segment->addr = segment_addr;
codeset_segment->size = aligned_size;
memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz);
std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
p->p_filesz);
current_image_position += aligned_size;
}
}

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "core/file_sys/kernel_executable.h"
#include "core/file_sys/program_metadata.h"
#include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
segment.addr = offset;
segment.offset = offset;
segment.size = PageAlignSize(static_cast<u32>(data.size()));
program_image.resize(offset);
program_image.insert(program_image.end(), data.begin(), data.end());
program_image.resize(offset + data.size());
std::memcpy(program_image.data() + offset, data.data(), data.size());
};
load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cinttypes>
#include <cstring>
#include <vector>
#include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
if (nso_header.IsSegmentCompressed(i)) {
data = DecompressSegment(data, nso_header.segments[i]);
}
program_image.resize(nso_header.segments[i].location);
program_image.insert(program_image.end(), data.begin(), data.end());
program_image.resize(nso_header.segments[i].location + data.size());
std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
data.size());
codeset.segments[i].addr = nso_header.segments[i].location;
codeset.segments[i].offset = nso_header.segments[i].location;
codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
std::vector<u8> pi_header;
pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
program_image.end());
pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
program_image.data() + program_image.size());
pi_header = pm->PatchNSO(pi_header, file.GetName());
std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
}
// Apply cheats if they exist and the program has a valid title ID

View File

@@ -14,6 +14,7 @@
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
}
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
Kernel::PhysicalMemory& memory, VAddr offset) {
MapMemoryRegion(page_table, base, size, memory.data() + offset);
}
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -146,7 +152,7 @@ struct Memory::Impl {
u8* GetPointer(const VAddr vaddr) {
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
if (page_pointer != nullptr) {
return page_pointer + (vaddr & PAGE_MASK);
return page_pointer + vaddr;
}
if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +235,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
const u8* const src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
@@ -276,7 +283,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
u8* const dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
@@ -322,7 +330,8 @@ struct Memory::Impl {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
u8* dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memset(dest_ptr, 0, copy_amount);
break;
}
@@ -368,7 +377,8 @@ struct Memory::Impl {
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
const u8* src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
WriteBlock(process, dest_addr, src_ptr, copy_amount);
break;
}
@@ -446,7 +456,8 @@ struct Memory::Impl {
page_type = Common::PageType::Unmapped;
} else {
page_type = Common::PageType::Memory;
current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
current_page_table->pointers[vaddr >> PAGE_BITS] =
pointer - (vaddr & ~PAGE_MASK);
}
break;
}
@@ -493,7 +504,9 @@ struct Memory::Impl {
memory);
} else {
while (base != end) {
page_table.pointers[base] = memory;
page_table.pointers[base] = memory - (base << PAGE_BITS);
ASSERT_MSG(page_table.pointers[base],
"memory mapping base yield a nullptr within the table");
base += 1;
memory += PAGE_SIZE;
@@ -518,7 +531,7 @@ struct Memory::Impl {
if (page_pointer != nullptr) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
return value;
}
@@ -559,7 +572,7 @@ struct Memory::Impl {
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
if (page_pointer != nullptr) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
return;
}
@@ -594,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
impl->SetCurrentPageTable(process);
}
void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
Kernel::PhysicalMemory& memory, VAddr offset) {
impl->MapMemoryRegion(page_table, base, size, memory, offset);
}
void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
impl->MapMemoryRegion(page_table, base, size, target);
}

View File

@@ -19,8 +19,9 @@ class System;
}
namespace Kernel {
class PhysicalMemory;
class Process;
}
} // namespace Kernel
namespace Memory {
@@ -65,6 +66,19 @@ public:
*/
void SetCurrentPageTable(Kernel::Process& process);
/**
* Maps an physical buffer onto a region of the emulated process address space.
*
* @param page_table The page table of the emulated process.
* @param base The address to start mapping at. Must be page-aligned.
* @param size The amount of bytes to map. Must be page-aligned.
* @param memory Physical buffer with the memory backing the mapping. Must be of length
* at least `size + offset`.
* @param offset The offset within the physical memory. Must be page-aligned.
*/
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
Kernel::PhysicalMemory& memory, VAddr offset);
/**
* Maps an allocated buffer onto a region of the emulated process address space.
*

View File

@@ -4,8 +4,6 @@ add_library(video_core STATIC
buffer_cache/map_interval.h
dma_pusher.cpp
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
engines/const_buffer_engine_interface.h
engines/const_buffer_info.h
engines/engine_upload.cpp
@@ -151,14 +149,35 @@ add_library(video_core STATIC
if (ENABLE_VULKAN)
target_sources(video_core PRIVATE
renderer_vulkan/declarations.h
renderer_vulkan/fixed_pipeline_state.cpp
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_blit_screen.cpp
renderer_vulkan/vk_blit_screen.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_compute_pass.cpp
renderer_vulkan/vk_compute_pass.h
renderer_vulkan/vk_compute_pipeline.cpp
renderer_vulkan/vk_compute_pipeline.h
renderer_vulkan/vk_descriptor_pool.cpp
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
renderer_vulkan/vk_image.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_sampler_cache.cpp
@@ -167,10 +186,19 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_shader_decompiler.cpp
renderer_vulkan/vk_shader_decompiler.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_staging_buffer_pool.cpp
renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h)
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -1,49 +0,0 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <mutex>
#include "video_core/debug_utils/debug_utils.h"
namespace Tegra {
void DebugContext::DoOnEvent(Event event, void* data) {
{
std::unique_lock lock{breakpoint_mutex};
// TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
// show on debug widgets
// TODO: Should stop the CPU thread here once we multithread emulation.
active_breakpoint = event;
at_breakpoint = true;
// Tell all observers that we hit a breakpoint
for (auto& breakpoint_observer : breakpoint_observers) {
breakpoint_observer->OnMaxwellBreakPointHit(event, data);
}
// Wait until another thread tells us to Resume()
resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
}
}
void DebugContext::Resume() {
{
std::lock_guard lock{breakpoint_mutex};
// Tell all observers that we are about to resume
for (auto& breakpoint_observer : breakpoint_observers) {
breakpoint_observer->OnMaxwellResume();
}
// Resume the waiting thread (i.e. OnEvent())
at_breakpoint = false;
}
resume_from_breakpoint.notify_one();
}
} // namespace Tegra

View File

@@ -1,157 +0,0 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <condition_variable>
#include <list>
#include <memory>
#include <mutex>
namespace Tegra {
class DebugContext {
public:
enum class Event {
FirstEvent = 0,
MaxwellCommandLoaded = FirstEvent,
MaxwellCommandProcessed,
IncomingPrimitiveBatch,
FinishedPrimitiveBatch,
NumEvents
};
/**
* Inherit from this class to be notified of events registered to some debug context.
* Most importantly this is used for our debugger GUI.
*
* To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
* @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
* access
* @todo Evaluate an alternative interface, in which there is only one managing observer and
* multiple child observers running (by design) on the same thread.
*/
class BreakPointObserver {
public:
/// Constructs the object such that it observes events of the given DebugContext.
explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
: context_weak(debug_context) {
std::unique_lock lock{debug_context->breakpoint_mutex};
debug_context->breakpoint_observers.push_back(this);
}
virtual ~BreakPointObserver() {
auto context = context_weak.lock();
if (context) {
{
std::unique_lock lock{context->breakpoint_mutex};
context->breakpoint_observers.remove(this);
}
// If we are the last observer to be destroyed, tell the debugger context that
// it is free to continue. In particular, this is required for a proper yuzu
// shutdown, when the emulation thread is waiting at a breakpoint.
if (context->breakpoint_observers.empty())
context->Resume();
}
}
/**
* Action to perform when a breakpoint was reached.
* @param event Type of event which triggered the breakpoint
* @param data Optional data pointer (if unused, this is a nullptr)
* @note This function will perform nothing unless it is overridden in the child class.
*/
virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
/**
* Action to perform when emulation is resumed from a breakpoint.
* @note This function will perform nothing unless it is overridden in the child class.
*/
virtual void OnMaxwellResume() {}
protected:
/**
* Weak context pointer. This need not be valid, so when requesting a shared_ptr via
* context_weak.lock(), always compare the result against nullptr.
*/
std::weak_ptr<DebugContext> context_weak;
};
/**
* Simple structure defining a breakpoint state
*/
struct BreakPoint {
bool enabled = false;
};
/**
* Static constructor used to create a shared_ptr of a DebugContext.
*/
static std::shared_ptr<DebugContext> Construct() {
return std::shared_ptr<DebugContext>(new DebugContext);
}
/**
* Used by the emulation core when a given event has happened. If a breakpoint has been set
* for this event, OnEvent calls the event handlers of the registered breakpoint observers.
* The current thread then is halted until Resume() is called from another thread (or until
* emulation is stopped).
* @param event Event which has happened
* @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
* Resume() is called.
*/
void OnEvent(Event event, void* data) {
// This check is left in the header to allow the compiler to inline it.
if (!breakpoints[(int)event].enabled)
return;
// For the rest of event handling, call a separate function.
DoOnEvent(event, data);
}
void DoOnEvent(Event event, void* data);
/**
* Resume from the current breakpoint.
* @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
* Calling from any other thread is safe.
*/
void Resume();
/**
* Delete all set breakpoints and resume emulation.
*/
void ClearBreakpoints() {
for (auto& bp : breakpoints) {
bp.enabled = false;
}
Resume();
}
// TODO: Evaluate if access to these members should be hidden behind a public interface.
std::array<BreakPoint, static_cast<int>(Event::NumEvents)> breakpoints;
Event active_breakpoint{};
bool at_breakpoint = false;
private:
/**
* Private default constructor to make sure people always construct this through Construct()
* instead.
*/
DebugContext() = default;
/// Mutex protecting current breakpoint state and the observer list.
std::mutex breakpoint_mutex;
/// Used by OnEvent to wait for resumption.
std::condition_variable resume_from_breakpoint;
/// List of registered observers
std::list<BreakPointObserver*> breakpoint_observers;
};
} // namespace Tegra

View File

@@ -7,7 +7,6 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
@@ -88,11 +87,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
color_mask.A.Assign(1);
}
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
// NVN games expect these values to be enabled at boot
regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1;
// Some games (like Super Mario Odyssey) assume that SRGB is enabled.
regs.framebuffer_srgb = 1;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
@@ -273,8 +272,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = system.GetGPUDebugContext();
const u32 method = method_call.method;
if (method == cb_data_state.current) {
@@ -315,10 +312,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
const std::size_t dirty_reg = dirty_pointers[method];
@@ -424,10 +417,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
default:
break;
}
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
}
}
void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
@@ -485,12 +474,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
auto debug_context = system.GetGPUDebugContext();
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
}
// Both instance configuration registers can not be set at the same time.
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
@@ -500,10 +483,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
rasterizer.DrawMultiBatch(is_indexed);
}
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
// it's possible that it is incorrect and that there is some other register used to specify the
@@ -650,12 +629,6 @@ void Maxwell3D::DrawArrays() {
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = system.GetGPUDebugContext();
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
}
// Both instance configuration registers can not be set at the same time.
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
@@ -673,10 +646,6 @@ void Maxwell3D::DrawArrays() {
rasterizer.DrawBatch(is_indexed);
}
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
// it's possible that it is incorrect and that there is some other register used to specify the

View File

@@ -310,6 +310,11 @@ public:
}
};
enum class DepthMode : u32 {
MinusOneToOne = 0,
ZeroToOne = 1,
};
enum class PrimitiveTopology : u32 {
Points = 0x0,
Lines = 0x1,
@@ -491,11 +496,6 @@ public:
INSERT_UNION_PADDING_WORDS(1);
};
enum class DepthMode : u32 {
MinusOneToOne = 0,
ZeroToOne = 1,
};
enum class TessellationPrimitive : u32 {
Isolines = 0,
Triangles = 1,
@@ -657,7 +657,11 @@ public:
std::array<f32, 4> tess_level_outer;
std::array<f32, 2> tess_level_inner;
INSERT_UNION_PADDING_WORDS(0x102);
INSERT_UNION_PADDING_WORDS(0x10);
u32 rasterize_enable;
INSERT_UNION_PADDING_WORDS(0xF1);
u32 tfb_enabled;
@@ -676,7 +680,7 @@ public:
u32 count;
} vertex_buffer;
INSERT_UNION_PADDING_WORDS(1);
DepthMode depth_mode;
float clear_color[4];
float clear_depth;
@@ -707,13 +711,15 @@ public:
u32 color_mask_common;
INSERT_UNION_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
INSERT_UNION_PADDING_WORDS(0x2);
f32 depth_bounds[2];
INSERT_UNION_PADDING_WORDS(0xA);
INSERT_UNION_PADDING_WORDS(0x2);
u32 rt_separate_frag_data;
INSERT_UNION_PADDING_WORDS(0xC);
struct {
u32 address_high;
@@ -1012,7 +1018,14 @@ public:
}
} instanced_arrays;
INSERT_UNION_PADDING_WORDS(0x6);
INSERT_UNION_PADDING_WORDS(0x4);
union {
BitField<0, 1, u32> enable;
BitField<4, 8, u32> unk4;
} vp_point_size;
INSERT_UNION_PADDING_WORDS(1);
Cull cull;
@@ -1030,7 +1043,12 @@ public:
BitField<4, 1, u32> depth_clamp_far;
} view_volume_clip_control;
INSERT_UNION_PADDING_WORDS(0x21);
INSERT_UNION_PADDING_WORDS(0x1F);
u32 depth_bounds_enable;
INSERT_UNION_PADDING_WORDS(1);
struct {
u32 enable;
LogicOperation operation;
@@ -1260,8 +1278,6 @@ public:
} dirty{};
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -1356,6 +1372,8 @@ private:
bool execute_on{true};
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1420,11 +1438,13 @@ ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
ASSERT_REG_POSITION(viewports, 0x300);
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(depth_mode, 0x35F);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(clear_stencil, 0x368);
@@ -1438,7 +1458,7 @@ ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(depth_bounds, 0x3EC);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1490,10 +1510,12 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(color_mask, 0x680);

View File

@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
Trunc = 11,
};
enum class AtomicOp : u64 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
Exch = 8,
};
enum class UniformType : u64 {
UnsignedByte = 0,
SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
Bits128 = 6,
};
enum class AtomicType : u64 {
U32 = 0,
S32 = 1,
U64 = 2,
S64 = 3,
};
enum class IMinMaxExchange : u64 {
None = 0,
XLo = 1,
@@ -384,6 +403,15 @@ enum class IsberdMode : u64 {
enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
enum class MembarType : u64 {
CTA = 0,
GL = 1,
SYS = 2,
VC = 3,
};
enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
enum class HalfType : u64 {
H0_H1 = 0,
F32 = 1,
@@ -929,6 +957,16 @@ union Instruction {
BitField<46, 2, u64> cache_mode;
} stg;
union {
BitField<52, 4, AtomicOp> operation;
BitField<28, 2, AtomicType> type;
BitField<30, 22, s64> offset;
s32 GetImmediateOffset() const {
return static_cast<s32>(offset << 2);
}
} atoms;
union {
BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size;
@@ -1042,7 +1080,7 @@ union Instruction {
BitField<40, 1, R2pMode> mode;
BitField<41, 2, u64> byte;
BitField<20, 7, u64> immediate_mask;
} r2p;
} p2r_r2p;
union {
BitField<39, 3, u64> pred39;
@@ -1230,7 +1268,7 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<54, 2, u64> info;
BitField<54, 2, u64> offset_mode;
BitField<56, 2, u64> component;
bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1242,9 +1280,9 @@ union Instruction {
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::AOFFI:
return info == 1;
return offset_mode == 1;
case TextureMiscMode::PTP:
return info == 2;
return offset_mode == 2;
default:
break;
}
@@ -1256,7 +1294,7 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<33, 2, u64> info;
BitField<33, 2, u64> offset_mode;
BitField<37, 2, u64> component;
bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1268,9 +1306,9 @@ union Instruction {
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::AOFFI:
return info == 1;
return offset_mode == 1;
case TextureMiscMode::PTP:
return info == 2;
return offset_mode == 2;
default:
break;
}
@@ -1283,6 +1321,7 @@ union Instruction {
BitField<50, 1, u64> dc_flag;
BitField<51, 1, u64> aoffi_flag;
BitField<52, 2, u64> component;
BitField<55, 1, u64> fp16_flag;
bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
@@ -1545,6 +1584,11 @@ union Instruction {
BitField<47, 2, IsberdShift> shift;
} isberd;
union {
BitField<8, 2, MembarType> type;
BitField<0, 2, MembarUnknown> unknown;
} membar;
union {
BitField<48, 1, u64> signed_a;
BitField<38, 1, u64> is_byte_chunk_a;
@@ -1644,9 +1688,10 @@ public:
ST_A,
ST_L,
ST_S,
ST, // Store in generic memory
STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory
ST, // Store in generic memory
STG, // Store in global memory
ATOMS, // Atomic operation on shared memory
AL2P, // Transforms attribute memory into physical memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
@@ -1669,6 +1714,7 @@ public:
IPA,
OUT_R, // Emit vertex/primitive
ISBERD,
MEMBAR,
VMAD,
VSETP,
FFMA_IMM, // Fused Multiply and Add
@@ -1785,6 +1831,7 @@ public:
PSET,
CSETP,
R2P_IMM,
P2R_IMM,
XMAD_IMM,
XMAD_CR,
XMAD_RC,
@@ -1930,7 +1977,7 @@ private:
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1947,6 +1994,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
@@ -1957,7 +2005,7 @@ private:
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
@@ -1969,6 +2017,7 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -2089,6 +2138,7 @@ private:
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),

View File

@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
MICROPROFILE_SCOPE(GPU_wait);
while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
}
std::unique_lock lock{sync_mutex};
sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
if (!syncpt_interrupts[syncpoint_id].empty()) {
u32 value = syncpoints[syncpoint_id].load();
auto it = syncpt_interrupts[syncpoint_id].begin();

View File

@@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <condition_variable>
#include <list>
#include <memory>
#include <mutex>
@@ -181,7 +182,7 @@ public:
virtual void WaitIdle() const = 0;
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value) const;
void WaitFence(u32 syncpoint_id, u32 value);
void IncrementSyncPoint(u32 syncpoint_id);
@@ -312,6 +313,8 @@ private:
std::mutex sync_mutex;
std::condition_variable sync_cv;
const bool is_async;
};

View File

@@ -5,6 +5,7 @@
#include <mutex>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/assert.h"
#include "common/common_types.h"

View File

@@ -5,6 +5,7 @@
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstring>
#include <optional>
#include <vector>
@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
Device::Device() : base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
const bool is_intel = vendor == "Intel";
const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_broken_compute = is_intel;
has_broken_compute = is_intel_proprietary;
has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);

View File

@@ -271,12 +271,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
case Maxwell::ShaderProgram::Fragment:
shader_program_manager->UseTrivialFragmentShader();
break;
default:
break;
}
continue;
}
// Currently this stages are not supported in the OpenGL backend.
// Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
if (program == Maxwell::ShaderProgram::TesselationControl) {
continue;
} else if (program == Maxwell::ShaderProgram::TesselationEval) {
continue;
}
Shader shader{shader_cache.GetStageProgram(program)};
// Stage indices are 0 - 5
@@ -506,6 +517,7 @@ void RasterizerOpenGL::Clear() {
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
SyncViewport(clear_state);
SyncRasterizeEnable(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -533,6 +545,7 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::DrawPrelude() {
auto& gpu = system.GPU().Maxwell3D();
SyncRasterizeEnable(state);
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -1028,6 +1041,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
flip_y = !flip_y;
}
state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
state.clip_control.depth_mode =
regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
? GL_ZERO_TO_ONE
: GL_NEGATIVE_ONE_TO_ONE;
}
void RasterizerOpenGL::SyncClipEnabled(
@@ -1121,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
}
void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
const auto& regs = system.GPU().Maxwell3D().regs;
current_state.rasterizer_discard = regs.rasterize_enable == 0;
}
void RasterizerOpenGL::SyncColorMask() {
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.color_mask) {
@@ -1250,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
state.point.program_control = regs.vp_point_size.enable != 0;
state.point.size = std::max(1.0f, regs.point_size);
}

View File

@@ -168,6 +168,9 @@ private:
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable(OpenGLState& current_state);
/// Syncs Color Mask
void SyncColorMask();

View File

@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;
namespace {
// One UBO is always reserved for emulation values on staged shaders
constexpr u32 STAGE_RESERVED_UBOS = 1;
constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
@@ -112,25 +109,25 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
}
/// Describes primitive behavior on geometry shaders
constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
case GL_POINTS:
return {"points", "Points", 1};
return {"points", 1};
case GL_LINES:
case GL_LINE_STRIP:
return {"lines", "Lines", 2};
return {"lines", 2};
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return {"lines_adjacency", "LinesAdj", 4};
return {"lines_adjacency", 4};
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return {"triangles", "Triangles", 3};
return {"triangles", 3};
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return {"triangles_adjacency", "TrianglesAdj", 6};
return {"triangles_adjacency", 6};
default:
return {"points", "Invalid", 1};
return {"points", 1};
}
}
@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
if (!code_b.empty()) {
ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
}
const auto entries = GLShader::GetEntries(ir);
std::string source = fmt::format(R"(// {}
#version 430 core
@@ -264,29 +260,24 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
"#extension GL_NV_shader_thread_group : require\n"
"#extension GL_NV_shader_thread_shuffle : require\n";
}
source += '\n';
if (shader_type == ShaderType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(variant.primitive_mode);
source += fmt::format("layout ({}) in;\n\n", glsl_topology);
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
source += fmt::format("layout ({}) in;\n", glsl_topology);
}
if (shader_type == ShaderType::Compute) {
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
source +=
fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
variant.block_x, variant.block_y, variant.block_z);
if (variant.shared_memory_size > 0) {
// TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
// avoids out of bound stores. Find out why shared memory size is being invalid.
source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
}
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
Common::AlignUp(variant.local_memory_size, 4) / 4);
// shared_memory_size is described in number of words
source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
}
}
@@ -319,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
: RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
: RasterizerCacheObject{params.host_ptr}, system{params.system},
disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, shader_type{shader_type},
entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
if (!params.precompiled_variants) {
return;
}

View File

@@ -48,10 +48,10 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -399,6 +399,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
@@ -750,6 +751,9 @@ private:
Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
if (const auto amend_index = operation->GetAmendIndex()) {
Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
}
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -871,6 +875,9 @@ private:
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
if (const auto amend_index = conditional->GetAmendIndex()) {
Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
}
// It's invalid to call conditional on nested nodes, use an operation instead
code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
++code.scope;
@@ -1076,7 +1083,7 @@ private:
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
const std::vector<TextureIR>& extras) {
const std::vector<TextureIR>& extras, bool separate_dc = false) {
constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1089,9 +1096,12 @@ private:
std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
} else if (!meta->ptp.empty()) {
expr += "Offsets";
}
expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
(has_shadow && !separate_dc ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += Visit(operation[i]).AsFloat();
@@ -1104,15 +1114,24 @@ private:
expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
expr += ", " + Visit(meta->depth_compare).AsFloat();
if (separate_dc) {
expr += "), " + Visit(meta->depth_compare).AsFloat();
} else {
expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
}
} else {
expr += ')';
}
expr += ')';
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
} else if (std::holds_alternative<TextureAoffi>(variant)) {
expr += GenerateTextureAoffi(meta->aoffi);
} else if (std::holds_alternative<TextureOffset>(variant)) {
if (!meta->aoffi.empty()) {
expr += GenerateTextureAoffi(meta->aoffi);
} else if (!meta->ptp.empty()) {
expr += GenerateTexturePtp(meta->ptp);
}
} else if (std::holds_alternative<TextureDerivates>(variant)) {
expr += GenerateTextureDerivates(meta->derivates);
} else {
@@ -1153,6 +1172,20 @@ private:
return expr;
}
std::string ReadTextureOffset(const Node& value) {
if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
return std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
return Visit(value).AsInt();
} else {
// Insert 0 on devices not supporting variable AOFFI.
return "0";
}
}
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
if (aoffi.empty()) {
return {};
@@ -1163,18 +1196,7 @@ private:
expr += '(';
for (std::size_t index = 0; index < aoffi.size(); ++index) {
const auto operand{aoffi.at(index)};
if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
expr += Visit(operand).AsInt();
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
}
expr += ReadTextureOffset(aoffi.at(index));
if (index + 1 < aoffi.size()) {
expr += ", ";
}
@@ -1184,6 +1206,20 @@ private:
return expr;
}
std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
static constexpr std::size_t num_vectors = 4;
ASSERT(ptp.size() == num_vectors * 2);
std::string expr = ", ivec2[](";
for (std::size_t vector = 0; vector < num_vectors; ++vector) {
const bool has_next = vector + 1 < num_vectors;
expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
}
expr += ')';
return expr;
}
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
if (derivates.empty()) {
return {};
@@ -1682,7 +1718,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1694,7 +1730,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1702,13 +1738,18 @@ private:
}
Expression TextureGather(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
return {GenerateTexture(operation, "Gather",
{TextureAoffi{}, TextureArgument{type, meta->component}}) +
GetSwizzle(meta->element),
const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
const bool separate_dc = meta.sampler.IsShadow();
std::vector<TextureIR> ir;
if (meta.sampler.IsShadow()) {
ir = {TextureOffset{}};
} else {
ir = {TextureOffset{}, TextureArgument{type, meta.component}};
}
return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
Type::Float};
}
@@ -1780,7 +1821,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
std::string expr =
GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
}
@@ -1814,6 +1856,16 @@ private:
Type::Uint};
}
template <const std::string_view& opname, Type type>
Expression Atomic(Operation operation) {
ASSERT(stage == ShaderType::Compute);
auto& smem = std::get<SmemNode>(*operation[0]);
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
Visit(operation[1]).As(type)),
type};
}
Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1992,6 +2044,11 @@ private:
return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
Expression MemoryBarrierGL(Operation) {
code.AddLine("memoryBarrier();");
return {};
}
struct Func final {
Func() = delete;
~Func() = delete;
@@ -2147,6 +2204,8 @@ private:
&GLSLDecompiler::AtomicImage<Func::Xor>,
&GLSLDecompiler::AtomicImage<Func::Exchange>,
&GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack,
@@ -2173,6 +2232,8 @@ private:
&GLSLDecompiler::ThreadId,
&GLSLDecompiler::ShuffleIndexed,
&GLSLDecompiler::MemoryBarrierGL,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2264,7 +2325,7 @@ public:
explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
void operator()(const ExprAnd& expr) {
inner += "( ";
inner += '(';
std::visit(*this, *expr.operand1);
inner += " && ";
std::visit(*this, *expr.operand2);
@@ -2272,7 +2333,7 @@ public:
}
void operator()(const ExprOr& expr) {
inner += "( ";
inner += '(';
std::visit(*this, *expr.operand1);
inner += " || ";
std::visit(*this, *expr.operand2);
@@ -2290,28 +2351,7 @@ public:
}
void operator()(const ExprCondCode& expr) {
const Node cc = decomp.ir.GetConditionCode(expr.cc);
std::string target;
if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
const auto index = pred->GetIndex();
switch (index) {
case Tegra::Shader::Pred::NeverExecute:
target = "false";
break;
case Tegra::Shader::Pred::UnusedIndex:
target = "true";
break;
default:
target = decomp.GetPredicate(index);
break;
}
} else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
target = decomp.GetInternalFlag(flag->GetFlag());
} else {
UNREACHABLE();
}
inner += target;
inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
}
void operator()(const ExprVar& expr) {
@@ -2323,8 +2363,7 @@ public:
}
void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
inner +=
"( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
}
const std::string& GetResult() const {
@@ -2332,8 +2371,8 @@ public:
}
private:
std::string inner;
GLSLDecompiler& decomp;
std::string inner;
};
class ASTDecompiler {

View File

@@ -50,6 +50,10 @@ public:
current_state.geometry_shader = 0;
}
void UseTrivialFragmentShader() {
current_state.fragment_shader = 0;
}
private:
struct PipelineState {
bool operator==(const PipelineState& rhs) const {

View File

@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
}
void OpenGLState::ApplyPointSize() {
Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
@@ -182,6 +183,10 @@ void OpenGLState::ApplyCulling() {
}
}
void OpenGLState::ApplyRasterizerDiscard() {
Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
}
void OpenGLState::ApplyColorMask() {
if (!dirty.color_mask) {
return;
@@ -411,8 +416,9 @@ void OpenGLState::ApplyAlphaTest() {
}
void OpenGLState::ApplyClipControl() {
if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
std::tie(clip_control.origin, clip_control.depth_mode))) {
glClipControl(clip_control.origin, clip_control.depth_mode);
}
}
@@ -454,6 +460,7 @@ void OpenGLState::Apply() {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
ApplyRasterizerDiscard();
ApplyColorMask();
ApplyDepthClamp();
ApplyViewport();

View File

@@ -48,6 +48,8 @@ public:
GLuint index = 0;
} primitive_restart; // GL_PRIMITIVE_RESTART
bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
struct ColorMask {
GLboolean red_enabled = GL_TRUE;
GLboolean green_enabled = GL_TRUE;
@@ -56,6 +58,7 @@ public:
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
struct {
bool test_enabled = false; // GL_STENCIL_TEST
struct {
@@ -128,7 +131,8 @@ public:
std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
float size = 1.0f; // GL_POINT_SIZE
bool program_control = false; // GL_PROGRAM_POINT_SIZE
GLfloat size = 1.0f; // GL_POINT_SIZE
} point;
struct {
@@ -150,6 +154,7 @@ public:
struct {
GLenum origin = GL_LOWER_LEFT;
GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
} clip_control;
OpenGLState();
@@ -173,6 +178,7 @@ public:
void ApplyMultisample();
void ApplySRgb();
void ApplyCulling();
void ApplyRasterizerDiscard();
void ApplyColorMask();
void ApplyDepth();
void ApplyPrimitiveRestart();

View File

@@ -44,7 +44,7 @@ struct FormatTuple {
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
{GL_RG8, GL_RG, GL_BYTE, false}, // RG8S
{GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
u8* const mip_data = staging_buffer.data() + mip_offset;
const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
if (is_compressed) {
glGetCompressedTextureImage(texture.handle, level,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
glGetCompressedTextureImage(texture.handle, level, size, mip_data);
} else {
glGetTextureImage(texture.handle, level, format, type,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
glGetTextureImage(texture.handle, level, format, type, size, mip_data);
}
}
}

View File

@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_POINTS;
case Maxwell::PrimitiveTopology::Lines:
return GL_LINES;
case Maxwell::PrimitiveTopology::LineLoop:
return GL_LINE_LOOP;
case Maxwell::PrimitiveTopology::LineStrip:
return GL_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLE_FAN;
case Maxwell::PrimitiveTopology::Quads:
return GL_QUADS;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
case Maxwell::PrimitiveTopology::QuadStrip:
return GL_QUAD_STRIP;
case Maxwell::PrimitiveTopology::Polygon:
return GL_POLYGON;
case Maxwell::PrimitiveTopology::LinesAdjacency:
return GL_LINES_ADJACENCY;
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return GL_LINE_STRIP_ADJACENCY;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
return GL_TRIANGLES_ADJACENCY;
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return GL_TRIANGLE_STRIP_ADJACENCY;
case Maxwell::PrimitiveTopology::Patches:
return GL_PATCHES;
}
UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
return GL_POINTS;
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,

View File

@@ -24,19 +24,21 @@
namespace OpenGL {
static const char vertex_shader[] = R"(
#version 150 core
namespace {
in vec2 vert_position;
in vec2 vert_tex_coord;
out vec2 frag_tex_coord;
constexpr char vertex_shader[] = R"(
#version 430 core
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
// This is a truncated 3x3 matrix for 2D transformations:
// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
// The third column performs translation.
// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
// implicitly be [0, 0, 1]
uniform mat3x2 modelview_matrix;
layout (location = 0) uniform mat3x2 modelview_matrix;
void main() {
// Multiply input position by the rotscale part of the matrix and then manually translate by
@@ -47,34 +49,29 @@ void main() {
}
)";
static const char fragment_shader[] = R"(
#version 150 core
constexpr char fragment_shader[] = R"(
#version 430 core
in vec2 frag_tex_coord;
out vec4 color;
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
uniform sampler2D color_texture;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
// Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
// support more framebuffer pixel formats.
color = texture(color_texture, frag_tex_coord);
}
)";
/**
* Vertex structure that the drawn screen rectangles are composed of.
*/
struct ScreenRectVertex {
ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
position[0] = x;
position[1] = y;
tex_coord[0] = u;
tex_coord[1] = v;
}
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
GLfloat position[2];
GLfloat tex_coord[2];
struct ScreenRectVertex {
constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
: position{{x, y}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord;
};
/**
@@ -84,18 +81,82 @@ struct ScreenRectVertex {
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
const char* GetSource(GLenum source) {
switch (source) {
case GL_DEBUG_SOURCE_API:
return "API";
case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
return "WINDOW_SYSTEM";
case GL_DEBUG_SOURCE_SHADER_COMPILER:
return "SHADER_COMPILER";
case GL_DEBUG_SOURCE_THIRD_PARTY:
return "THIRD_PARTY";
case GL_DEBUG_SOURCE_APPLICATION:
return "APPLICATION";
case GL_DEBUG_SOURCE_OTHER:
return "OTHER";
default:
UNREACHABLE();
return "Unknown source";
}
}
const char* GetType(GLenum type) {
switch (type) {
case GL_DEBUG_TYPE_ERROR:
return "ERROR";
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
return "DEPRECATED_BEHAVIOR";
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
return "UNDEFINED_BEHAVIOR";
case GL_DEBUG_TYPE_PORTABILITY:
return "PORTABILITY";
case GL_DEBUG_TYPE_PERFORMANCE:
return "PERFORMANCE";
case GL_DEBUG_TYPE_OTHER:
return "OTHER";
case GL_DEBUG_TYPE_MARKER:
return "MARKER";
default:
UNREACHABLE();
return "Unknown type";
}
}
void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const GLchar* message, const void* user_param) {
const char format[] = "{} {} {}: {}";
const char* const str_source = GetSource(source);
const char* const str_type = GetType(type);
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
break;
}
}
} // Anonymous namespace
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
@@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
prev_state.Apply();
}
/**
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
@@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
/**
* Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
* be 1x1 but will stretch across whatever it's rendered on.
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
/**
* Initializes the OpenGL state and creates persistent objects.
*/
void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
@@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() {
state.draw.shader_program = shader.handle;
state.AllDirty();
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
attrib_position = glGetAttribLocation(shader.handle, "vert_position");
attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
sizeof(ScreenRectVertex));
@@ -331,18 +378,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
static_cast<f32>(screen_info.texture.height);
}
std::array<ScreenRectVertex, 4> vertices = {{
const std::array vertices = {
ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
}};
};
state.textures[0] = screen_info.display_texture;
state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
@@ -351,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
state.Apply();
}
/**
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
@@ -367,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
// Set projection matrix
std::array<GLfloat, 3 * 2> ortho_matrix =
MakeOrthographicMatrix((float)layout.width, (float)layout.height);
glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
// Bind texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glUniform1i(uniform_color_texture, 0);
DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
(float)screen.GetWidth(), (float)screen.GetHeight());
DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
static_cast<float>(screen.GetHeight()));
m_current_frame++;
}
/// Updates the framerate
void RendererOpenGL::UpdateFramerate() {}
void RendererOpenGL::CaptureScreenshot() {
@@ -418,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() {
renderer_settings.screenshot_requested = false;
}
static const char* GetSource(GLenum source) {
#define RET(s) \
case GL_DEBUG_SOURCE_##s: \
return #s
switch (source) {
RET(API);
RET(WINDOW_SYSTEM);
RET(SHADER_COMPILER);
RET(THIRD_PARTY);
RET(APPLICATION);
RET(OTHER);
default:
UNREACHABLE();
return "Unknown source";
}
#undef RET
}
static const char* GetType(GLenum type) {
#define RET(t) \
case GL_DEBUG_TYPE_##t: \
return #t
switch (type) {
RET(ERROR);
RET(DEPRECATED_BEHAVIOR);
RET(UNDEFINED_BEHAVIOR);
RET(PORTABILITY);
RET(PERFORMANCE);
RET(OTHER);
RET(MARKER);
default:
UNREACHABLE();
return "Unknown type";
}
#undef RET
}
static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message, const void* user_param) {
const char format[] = "{} {} {}: {}";
const char* const str_source = GetSource(source);
const char* const str_type = GetType(type);
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
break;
}
}
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
@@ -495,7 +478,6 @@ bool RendererOpenGL::Init() {
return true;
}
/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL

View File

@@ -59,21 +59,31 @@ public:
void ShutDown() override;
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
/// Updates the framerate.
void UpdateFramerate();
void CaptureScreenshot();
// Loads framebuffer from emulated memory into the display information structure
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
// Fills active OpenGL texture with the given RGBA color.
/// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
/// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
@@ -94,14 +104,6 @@ private:
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
// Shader uniform location indices
GLuint uniform_modelview_matrix;
GLuint uniform_color_texture;
// Shader attribute input indices
GLuint attrib_position;
GLuint attrib_tex_coord;
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;

View File

@@ -6,16 +6,20 @@
#include <vector>
#include <fmt/format.h>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
struct VertexArrayPushBuffer::Entry {
GLuint binding_index{};
const GLuint* buffer{};
GLintptr offset{};
GLsizei stride{};
};
VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
}
}
struct BindBuffersRangePushBuffer::Entry {
GLuint binding;
const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;

View File

@@ -26,12 +26,7 @@ public:
void Bind();
private:
struct Entry {
GLuint binding_index{};
const GLuint* buffer{};
GLintptr offset{};
GLsizei stride{};
};
struct Entry;
GLuint vao{};
const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
void Bind();
private:
struct Entry {
GLuint binding;
const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
struct Entry;
GLenum target;
std::vector<Entry> entries;

View File

@@ -0,0 +1,302 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
namespace Vulkan {
namespace {
constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) {
const FixedPipelineState::StencilFace front_stencil(
regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass,
regs.stencil_front_func_func);
const FixedPipelineState::StencilFace back_stencil =
regs.stencil_two_side_enable
? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail,
regs.stencil_back_op_zpass,
regs.stencil_back_func_func)
: front_stencil;
return FixedPipelineState::DepthStencil(
regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1,
regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil);
}
constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) {
return FixedPipelineState::InputAssembly(
regs.draw.topology, regs.primitive_restart.enabled,
regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f);
}
constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState(
const Maxwell& regs, std::size_t render_target) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target];
const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0};
const FixedPipelineState::BlendingAttachment default_blending(
false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
Maxwell::Blend::Factor::Zero, components);
if (render_target >= regs.rt_control.count) {
return default_blending;
}
if (!regs.independent_blend_enable) {
const auto& src = regs.blend;
if (!src.enable[render_target]) {
return default_blending;
}
return FixedPipelineState::BlendingAttachment(
true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
src.factor_source_a, src.factor_dest_a, components);
}
if (!regs.blend.enable[render_target]) {
return default_blending;
}
const auto& src = regs.independent_blend[render_target];
return FixedPipelineState::BlendingAttachment(
true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
src.factor_source_a, src.factor_dest_a, components);
}
constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) {
return FixedPipelineState::ColorBlending(
{regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a},
regs.rt_control.count,
{GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1),
GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3),
GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5),
GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)});
}
constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) {
return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim,
regs.tess_mode.spacing, regs.tess_mode.cw != 0);
}
constexpr std::size_t Point = 0;
constexpr std::size_t Line = 1;
constexpr std::size_t Polygon = 2;
constexpr std::array PolygonOffsetEnableLUT = {
Point, // Points
Line, // Lines
Line, // LineLoop
Line, // LineStrip
Polygon, // Triangles
Polygon, // TriangleStrip
Polygon, // TriangleFan
Polygon, // Quads
Polygon, // QuadStrip
Polygon, // Polygon
Line, // LinesAdjacency
Line, // LineStripAdjacency
Polygon, // TrianglesAdjacency
Polygon, // TriangleStripAdjacency
Polygon, // Patches
};
constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) {
const std::array enabled_lut = {regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable};
const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
const auto& clip = regs.view_volume_clip_control;
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
if (regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f) {
if (front_face == Maxwell::Cull::FrontFace::CounterClockWise)
front_face = Maxwell::Cull::FrontFace::ClockWise;
else if (front_face == Maxwell::Cull::FrontFace::ClockWise)
front_face = Maxwell::Cull::FrontFace::CounterClockWise;
}
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
front_face);
}
} // Anonymous namespace
std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept {
return (index << stride) ^ divisor;
}
bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept {
return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor);
}
std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept {
return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^
(static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^
(static_cast<std::size_t>(offset) << 36);
}
bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept {
return std::tie(index, buffer, type, size, offset) ==
std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset);
}
std::size_t FixedPipelineState::StencilFace::Hash() const noexcept {
return static_cast<std::size_t>(action_stencil_fail) ^
(static_cast<std::size_t>(action_depth_fail) << 4) ^
(static_cast<std::size_t>(action_depth_fail) << 20) ^
(static_cast<std::size_t>(action_depth_pass) << 36);
}
bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept {
return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) ==
std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass,
rhs.test_func);
}
std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept {
return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^
(static_cast<std::size_t>(src_rgb_func) << 10) ^
(static_cast<std::size_t>(dst_rgb_func) << 15) ^
(static_cast<std::size_t>(a_equation) << 20) ^
(static_cast<std::size_t>(src_a_func) << 25) ^
(static_cast<std::size_t>(dst_a_func) << 30) ^
(static_cast<std::size_t>(components[0]) << 35) ^
(static_cast<std::size_t>(components[1]) << 36) ^
(static_cast<std::size_t>(components[2]) << 37) ^
(static_cast<std::size_t>(components[3]) << 38);
}
bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const
noexcept {
return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func,
dst_a_func, components) ==
std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func,
rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components);
}
std::size_t FixedPipelineState::VertexInput::Hash() const noexcept {
std::size_t hash = num_bindings ^ (num_attributes << 32);
for (std::size_t i = 0; i < num_bindings; ++i) {
boost::hash_combine(hash, bindings[i].Hash());
}
for (std::size_t i = 0; i < num_attributes; ++i) {
boost::hash_combine(hash, attributes[i].Hash());
}
return hash;
}
bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept {
return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(),
rhs.bindings.begin() + rhs.num_bindings) &&
std::equal(attributes.begin(), attributes.begin() + num_attributes,
rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes);
}
std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept {
std::size_t point_size_int = 0;
std::memcpy(&point_size_int, &point_size, sizeof(point_size));
return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^
static_cast<std::size_t>(primitive_restart_enable);
}
bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept {
return std::tie(topology, primitive_restart_enable, point_size) ==
std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size);
}
std::size_t FixedPipelineState::Tessellation::Hash() const noexcept {
return static_cast<std::size_t>(patch_control_points) ^
(static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^
(static_cast<std::size_t>(clockwise) << 10);
}
bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept {
return std::tie(patch_control_points, primitive, spacing, clockwise) ==
std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise);
}
std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
return static_cast<std::size_t>(cull_enable) ^
(static_cast<std::size_t>(depth_bias_enable) << 1) ^
(static_cast<std::size_t>(depth_clamp_enable) << 2) ^
(static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
(static_cast<std::size_t>(cull_face) << 24) ^
(static_cast<std::size_t>(front_face) << 48);
}
bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
cull_face, front_face) ==
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
}
std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^
(static_cast<std::size_t>(depth_write_enable) << 1) ^
(static_cast<std::size_t>(depth_bounds_enable) << 2) ^
(static_cast<std::size_t>(stencil_enable) << 3) ^
(static_cast<std::size_t>(depth_test_function) << 4);
boost::hash_combine(hash, front_stencil.Hash());
boost::hash_combine(hash, back_stencil.Hash());
return hash;
}
bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept {
return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function,
stencil_enable, front_stencil, back_stencil) ==
std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable,
rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil,
rhs.back_stencil);
}
std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept {
std::size_t hash = attachments_count << 13;
for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) {
boost::hash_combine(hash, attachments[rt].Hash());
}
return hash;
}
bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept {
return std::equal(attachments.begin(), attachments.begin() + attachments_count,
rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count);
}
std::size_t FixedPipelineState::Hash() const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, vertex_input.Hash());
boost::hash_combine(hash, input_assembly.Hash());
boost::hash_combine(hash, tessellation.Hash());
boost::hash_combine(hash, rasterizer.Hash());
boost::hash_combine(hash, depth_stencil.Hash());
boost::hash_combine(hash, color_blending.Hash());
return hash;
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil,
color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly,
rhs.tessellation, rhs.rasterizer, rhs.depth_stencil,
rhs.color_blending);
}
FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
FixedPipelineState fixed_state;
fixed_state.input_assembly = GetInputAssemblyState(regs);
fixed_state.tessellation = GetTessellationState(regs);
fixed_state.rasterizer = GetRasterizerState(regs);
fixed_state.depth_stencil = GetDepthStencilState(regs);
fixed_state.color_blending = GetColorBlendingState(regs);
return fixed_state;
}
} // namespace Vulkan

View File

@@ -0,0 +1,284 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
// TODO(Rodrigo): Optimize this structure.
struct FixedPipelineState {
using PixelFormat = VideoCore::Surface::PixelFormat;
struct VertexBinding {
constexpr VertexBinding(u32 index, u32 stride, u32 divisor)
: index{index}, stride{stride}, divisor{divisor} {}
VertexBinding() = default;
u32 index;
u32 stride;
u32 divisor;
std::size_t Hash() const noexcept;
bool operator==(const VertexBinding& rhs) const noexcept;
bool operator!=(const VertexBinding& rhs) const noexcept {
return !operator==(rhs);
}
};
struct VertexAttribute {
constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
Maxwell::VertexAttribute::Size size, u32 offset)
: index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
VertexAttribute() = default;
u32 index;
u32 buffer;
Maxwell::VertexAttribute::Type type;
Maxwell::VertexAttribute::Size size;
u32 offset;
std::size_t Hash() const noexcept;
bool operator==(const VertexAttribute& rhs) const noexcept;
bool operator!=(const VertexAttribute& rhs) const noexcept {
return !operator==(rhs);
}
};
struct StencilFace {
constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
Maxwell::StencilOp action_depth_fail,
Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func)
: action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail},
action_depth_pass{action_depth_pass}, test_func{test_func} {}
StencilFace() = default;
Maxwell::StencilOp action_stencil_fail;
Maxwell::StencilOp action_depth_fail;
Maxwell::StencilOp action_depth_pass;
Maxwell::ComparisonOp test_func;
std::size_t Hash() const noexcept;
bool operator==(const StencilFace& rhs) const noexcept;
bool operator!=(const StencilFace& rhs) const noexcept {
return !operator==(rhs);
}
};
struct BlendingAttachment {
constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation,
Maxwell::Blend::Factor src_rgb_func,
Maxwell::Blend::Factor dst_rgb_func,
Maxwell::Blend::Equation a_equation,
Maxwell::Blend::Factor src_a_func,
Maxwell::Blend::Factor dst_a_func,
std::array<bool, 4> components)
: enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func},
dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func},
dst_a_func{dst_a_func}, components{components} {}
BlendingAttachment() = default;
bool enable;
Maxwell::Blend::Equation rgb_equation;
Maxwell::Blend::Factor src_rgb_func;
Maxwell::Blend::Factor dst_rgb_func;
Maxwell::Blend::Equation a_equation;
Maxwell::Blend::Factor src_a_func;
Maxwell::Blend::Factor dst_a_func;
std::array<bool, 4> components;
std::size_t Hash() const noexcept;
bool operator==(const BlendingAttachment& rhs) const noexcept;
bool operator!=(const BlendingAttachment& rhs) const noexcept {
return !operator==(rhs);
}
};
struct VertexInput {
std::size_t num_bindings = 0;
std::size_t num_attributes = 0;
std::array<VertexBinding, Maxwell::NumVertexArrays> bindings;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::size_t Hash() const noexcept;
bool operator==(const VertexInput& rhs) const noexcept;
bool operator!=(const VertexInput& rhs) const noexcept {
return !operator==(rhs);
}
};
struct InputAssembly {
constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable,
float point_size)
: topology{topology}, primitive_restart_enable{primitive_restart_enable},
point_size{point_size} {}
InputAssembly() = default;
Maxwell::PrimitiveTopology topology;
bool primitive_restart_enable;
float point_size;
std::size_t Hash() const noexcept;
bool operator==(const InputAssembly& rhs) const noexcept;
bool operator!=(const InputAssembly& rhs) const noexcept {
return !operator==(rhs);
}
};
struct Tessellation {
constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive,
Maxwell::TessellationSpacing spacing, bool clockwise)
: patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing},
clockwise{clockwise} {}
Tessellation() = default;
u32 patch_control_points;
Maxwell::TessellationPrimitive primitive;
Maxwell::TessellationSpacing spacing;
bool clockwise;
std::size_t Hash() const noexcept;
bool operator==(const Tessellation& rhs) const noexcept;
bool operator!=(const Tessellation& rhs) const noexcept {
return !operator==(rhs);
}
};
struct Rasterizer {
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
Maxwell::Cull::FrontFace front_face)
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
cull_face{cull_face}, front_face{front_face} {}
Rasterizer() = default;
bool cull_enable;
bool depth_bias_enable;
bool depth_clamp_enable;
bool ndc_minus_one_to_one;
Maxwell::Cull::CullFace cull_face;
Maxwell::Cull::FrontFace front_face;
std::size_t Hash() const noexcept;
bool operator==(const Rasterizer& rhs) const noexcept;
bool operator!=(const Rasterizer& rhs) const noexcept {
return !operator==(rhs);
}
};
struct DepthStencil {
constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable,
bool depth_bounds_enable, bool stencil_enable,
Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil,
StencilFace back_stencil)
: depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable},
depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable},
depth_test_function{depth_test_function}, front_stencil{front_stencil},
back_stencil{back_stencil} {}
DepthStencil() = default;
bool depth_test_enable;
bool depth_write_enable;
bool depth_bounds_enable;
bool stencil_enable;
Maxwell::ComparisonOp depth_test_function;
StencilFace front_stencil;
StencilFace back_stencil;
std::size_t Hash() const noexcept;
bool operator==(const DepthStencil& rhs) const noexcept;
bool operator!=(const DepthStencil& rhs) const noexcept {
return !operator==(rhs);
}
};
struct ColorBlending {
constexpr ColorBlending(
std::array<float, 4> blend_constants, std::size_t attachments_count,
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments)
: attachments_count{attachments_count}, attachments{attachments} {}
ColorBlending() = default;
std::size_t attachments_count;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::size_t Hash() const noexcept;
bool operator==(const ColorBlending& rhs) const noexcept;
bool operator!=(const ColorBlending& rhs) const noexcept {
return !operator==(rhs);
}
};
std::size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
bool operator!=(const FixedPipelineState& rhs) const noexcept {
return !operator==(rhs);
}
VertexInput vertex_input;
InputAssembly input_assembly;
Tessellation tessellation;
Rasterizer rasterizer;
DepthStencil depth_stencil;
ColorBlending color_blending;
};
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>);
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -44,7 +44,8 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
return {};
}
vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
return vk::SamplerAddressMode::eRepeat;
@@ -55,10 +56,20 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
case Tegra::Texture::WrapMode::Border:
return vk::SamplerAddressMode::eClampToBorder;
case Tegra::Texture::WrapMode::Clamp:
// TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
// eClampToBorder to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return vk::SamplerAddressMode::eClampToBorder;
if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
// Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
// by sending an invalid enumeration.
return static_cast<vk::SamplerAddressMode>(0xcafe);
}
// TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
switch (filter) {
case Tegra::Texture::TextureFilter::Nearest:
return vk::SamplerAddressMode::eClampToEdge;
case Tegra::Texture::TextureFilter::Linear:
return vk::SamplerAddressMode::eClampToBorder;
}
UNREACHABLE();
return vk::SamplerAddressMode::eClampToEdge;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return vk::SamplerAddressMode::eMirrorClampToEdge;
case Tegra::Texture::WrapMode::MirrorOnceBorder:
@@ -96,106 +107,140 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
} // namespace Sampler
namespace {
enum : u32 { Attachable = 1, Storage = 2 };
struct FormatTuple {
vk::Format format; ///< Vulkan format
bool attachable; ///< True when this format can be used as an attachment
};
static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{vk::Format::eA8B8G8R8UnormPack32, true}, // ABGR8U
{vk::Format::eUndefined, false}, // ABGR8S
{vk::Format::eUndefined, false}, // ABGR8UI
{vk::Format::eB5G6R5UnormPack16, false}, // B5G6R5U
{vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
{vk::Format::eUndefined, false}, // A1B5G5R5U
{vk::Format::eR8Unorm, true}, // R8U
{vk::Format::eUndefined, false}, // R8UI
{vk::Format::eUndefined, false}, // RGBA16F
{vk::Format::eUndefined, false}, // RGBA16U
{vk::Format::eUndefined, false}, // RGBA16UI
{vk::Format::eUndefined, false}, // R11FG11FB10F
{vk::Format::eUndefined, false}, // RGBA32UI
{vk::Format::eBc1RgbaUnormBlock, false}, // DXT1
{vk::Format::eBc2UnormBlock, false}, // DXT23
{vk::Format::eBc3UnormBlock, false}, // DXT45
{vk::Format::eBc4UnormBlock, false}, // DXN1
{vk::Format::eUndefined, false}, // DXN2UNORM
{vk::Format::eUndefined, false}, // DXN2SNORM
{vk::Format::eUndefined, false}, // BC7U
{vk::Format::eUndefined, false}, // BC6H_UF16
{vk::Format::eUndefined, false}, // BC6H_SF16
{vk::Format::eUndefined, false}, // ASTC_2D_4X4
{vk::Format::eUndefined, false}, // BGRA8
{vk::Format::eUndefined, false}, // RGBA32F
{vk::Format::eUndefined, false}, // RG32F
{vk::Format::eUndefined, false}, // R32F
{vk::Format::eUndefined, false}, // R16F
{vk::Format::eUndefined, false}, // R16U
{vk::Format::eUndefined, false}, // R16S
{vk::Format::eUndefined, false}, // R16UI
{vk::Format::eUndefined, false}, // R16I
{vk::Format::eUndefined, false}, // RG16
{vk::Format::eUndefined, false}, // RG16F
{vk::Format::eUndefined, false}, // RG16UI
{vk::Format::eUndefined, false}, // RG16I
{vk::Format::eUndefined, false}, // RG16S
{vk::Format::eUndefined, false}, // RGB32F
{vk::Format::eA8B8G8R8SrgbPack32, true}, // RGBA8_SRGB
{vk::Format::eUndefined, false}, // RG8U
{vk::Format::eUndefined, false}, // RG8S
{vk::Format::eUndefined, false}, // RG32UI
{vk::Format::eUndefined, false}, // RGBX16F
{vk::Format::eUndefined, false}, // R32UI
{vk::Format::eUndefined, false}, // ASTC_2D_8X8
{vk::Format::eUndefined, false}, // ASTC_2D_8X5
{vk::Format::eUndefined, false}, // ASTC_2D_5X4
// Compressed sRGB formats
{vk::Format::eUndefined, false}, // BGRA8_SRGB
{vk::Format::eUndefined, false}, // DXT1_SRGB
{vk::Format::eUndefined, false}, // DXT23_SRGB
{vk::Format::eUndefined, false}, // DXT45_SRGB
{vk::Format::eUndefined, false}, // BC7U_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_5X5
{vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
{vk::Format::eUndefined, false}, // ASTC_2D_10X8
{vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
int usage; ///< Describes image format usage
} constexpr tex_format_tuples[] = {
{vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U
{vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S
{vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI
{vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U
{vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
{vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle)
{vk::Format::eR8Unorm, Attachable | Storage}, // R8U
{vk::Format::eR8Uint, Attachable | Storage}, // R8UI
{vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
{vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
{vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
{vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
{vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
{vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1
{vk::Format::eBc2UnormBlock, {}}, // DXT23
{vk::Format::eBc3UnormBlock, {}}, // DXT45
{vk::Format::eBc4UnormBlock, {}}, // DXN1
{vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM
{vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM
{vk::Format::eBc7UnormBlock, {}}, // BC7U
{vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16
{vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16
{vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4
{vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8
{vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F
{vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F
{vk::Format::eR32Sfloat, Attachable | Storage}, // R32F
{vk::Format::eR16Sfloat, Attachable | Storage}, // R16F
{vk::Format::eR16Unorm, Attachable | Storage}, // R16U
{vk::Format::eUndefined, {}}, // R16S
{vk::Format::eUndefined, {}}, // R16UI
{vk::Format::eUndefined, {}}, // R16I
{vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16
{vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F
{vk::Format::eUndefined, {}}, // RG16UI
{vk::Format::eUndefined, {}}, // RG16I
{vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S
{vk::Format::eUndefined, {}}, // RGB32F
{vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB
{vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U
{vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S
{vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI
{vk::Format::eUndefined, {}}, // RGBX16F
{vk::Format::eR32Uint, Attachable | Storage}, // R32UI
{vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8
{vk::Format::eUndefined, {}}, // ASTC_2D_8X5
{vk::Format::eUndefined, {}}, // ASTC_2D_5X4
{vk::Format::eUndefined, {}}, // BGRA8_SRGB
{vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB
{vk::Format::eUndefined, {}}, // DXT23_SRGB
{vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB
{vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB
{vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U
{vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB
{vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB
{vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB
{vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB
{vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5
{vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB
{vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8
{vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB
{vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6
{vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB
{vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10
{vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB
{vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12
{vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB
{vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6
{vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB
{vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5
{vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB
{vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F
// Depth formats
{vk::Format::eD32Sfloat, true}, // Z32F
{vk::Format::eD16Unorm, true}, // Z16
{vk::Format::eD32Sfloat, Attachable}, // Z32F
{vk::Format::eD16Unorm, Attachable}, // Z16
// DepthStencil formats
{vk::Format::eD24UnormS8Uint, true}, // Z24S8
{vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
{vk::Format::eUndefined, false}, // Z32FS8
}};
{vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8
{vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated)
{vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8
};
static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
constexpr bool IsZetaFormat(PixelFormat pixel_format) {
return pixel_format >= PixelFormat::MaxColorFormat &&
pixel_format < PixelFormat::MaxDepthStencilFormat;
}
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
} // Anonymous namespace
const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
"Unimplemented texture format with pixel format={}",
static_cast<u32>(pixel_format));
FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
if (tuple.attachable) {
usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
: vk::FormatFeatureFlagBits::eColorAttachment;
auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
if (tuple.format == vk::Format::eUndefined) {
UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
static_cast<u32>(pixel_format));
return {vk::Format::eA8B8G8R8UnormPack32, true, true};
}
return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
// Use ABGR8 on hardware that doesn't support ASTC natively
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
? vk::Format::eA8B8G8R8SrgbPack32
: vk::Format::eA8B8G8R8UnormPack32;
}
const bool attachable = tuple.usage & Attachable;
const bool storage = tuple.usage & Storage;
vk::FormatFeatureFlags usage;
if (format_type == FormatType::Buffer) {
usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer |
vk::FormatFeatureFlagBits::eUniformTexelBuffer;
} else {
usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst |
vk::FormatFeatureFlagBits::eTransferSrc;
if (attachable) {
usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
: vk::FormatFeatureFlagBits::eColorAttachment;
}
if (storage) {
usage |= vk::FormatFeatureFlagBits::eStorageImage;
}
}
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
@@ -215,7 +260,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
return {};
}
vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return vk::PrimitiveTopology::ePointList;
@@ -227,6 +273,13 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return vk::PrimitiveTopology::eTriangleList;
case Maxwell::PrimitiveTopology::TriangleStrip:
return vk::PrimitiveTopology::eTriangleStrip;
case Maxwell::PrimitiveTopology::TriangleFan:
return vk::PrimitiveTopology::eTriangleFan;
case Maxwell::PrimitiveTopology::Quads:
// TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
return vk::PrimitiveTopology::eTriangleList;
case Maxwell::PrimitiveTopology::Patches:
return vk::PrimitiveTopology::ePatchList;
default:
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
return {};
@@ -236,37 +289,111 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return vk::Format::eR8Snorm;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Snorm;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return vk::Format::eR8G8B8Snorm;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return vk::Format::eR8G8B8A8Snorm;
case Maxwell::VertexAttribute::Size::Size_16:
return vk::Format::eR16Snorm;
case Maxwell::VertexAttribute::Size::Size_16_16:
return vk::Format::eR16G16Snorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return vk::Format::eR16G16B16Snorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Snorm;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return vk::Format::eA2B10G10R10SnormPack32;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return vk::Format::eR8Unorm;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Unorm;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return vk::Format::eR8G8B8Unorm;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return vk::Format::eR8G8B8A8Unorm;
case Maxwell::VertexAttribute::Size::Size_16:
return vk::Format::eR16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16:
return vk::Format::eR16G16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return vk::Format::eR16G16B16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Unorm;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedInt:
break;
switch (size) {
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Sint;
case Maxwell::VertexAttribute::Size::Size_8:
return vk::Format::eR8Sint;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Sint;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return vk::Format::eR8G8B8Sint;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return vk::Format::eR8G8B8A8Sint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Sint;
default:
break;
}
case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
return vk::Format::eR8Uint;
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Uint;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
return vk::Format::eR8G8B8Uint;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
default:
break;
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8_8:
return vk::Format::eR8G8Uscaled;
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::SignedScaled:
break;
case Maxwell::VertexAttribute::Type::Float:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return vk::Format::eR32G32B32A32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return vk::Format::eR32G32B32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32_32:
return vk::Format::eR32G32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32_32:
return vk::Format::eR32G32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32_32_32:
return vk::Format::eR32G32B32Sfloat;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return vk::Format::eR32G32B32A32Sfloat;
case Maxwell::VertexAttribute::Size::Size_16:
return vk::Format::eR16Sfloat;
case Maxwell::VertexAttribute::Size::Size_16_16:
return vk::Format::eR16G16Sfloat;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return vk::Format::eR16G16B16Sfloat;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Sfloat;
default:
break;
}
@@ -308,11 +435,14 @@ vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
return vk::IndexType::eUint16;
if (!device.IsExtIndexTypeUint8Supported()) {
UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
return vk::IndexType::eUint16;
}
return vk::IndexType::eUint8EXT;
case Maxwell::IndexFormat::UnsignedShort:
return vk::IndexType::eUint16;
case Maxwell::IndexFormat::UnsignedInt:

View File

@@ -4,7 +4,6 @@
#pragma once
#include <utility>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -23,24 +22,31 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);
vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
Tegra::Texture::TextureFilter filter);
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
} // namespace Sampler
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
PixelFormat pixel_format);
struct FormatInfo {
vk::Format format;
bool attachable;
bool storage;
};
FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);
vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device,
Maxwell::PrimitiveTopology topology);
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);
vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);

View File

@@ -0,0 +1,72 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <vector>
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Core {
class System;
}
namespace Vulkan {
class VKBlitScreen;
class VKDevice;
class VKFence;
class VKMemoryManager;
class VKResourceManager;
class VKSwapchain;
class VKScheduler;
class VKImage;
struct VKScreenInfo {
VKImage* image{};
u32 width{};
u32 height{};
bool is_srgb{};
};
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
~RendererVulkan() override;
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
/// Initialize the renderer
bool Init() override;
/// Shutdown the renderer
void ShutDown() override;
private:
std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
const vk::DispatchLoaderDynamic& dldi);
bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
void Report() const;
Core::System& system;
vk::Instance instance;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
UniqueDebugUtilsMessengerEXT debug_callback;
std::unique_ptr<VKDevice> device;
std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKMemoryManager> memory_manager;
std::unique_ptr<VKResourceManager> resource_manager;
std::unique_ptr<VKScheduler> scheduler;
std::unique_ptr<VKBlitScreen> blit_screen;
};
} // namespace Vulkan

View File

@@ -0,0 +1,24 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color;
layout (binding = 1) uniform sampler2D color_texture;
void main() {
color = texture(color_texture, frag_tex_coord);
}

View File

@@ -0,0 +1,28 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
layout (set = 0, binding = 0) uniform MatrixBlock {
mat4 modelview_matrix;
};
void main() {
gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
frag_tex_coord = vert_tex_coord;
}

View File

@@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) buffer OutputBuffer {
uint output_indexes[];
};
layout (push_constant) uniform PushConstants {
uint first;
};
void main() {
uint primitive = gl_GlobalInvocationID.x;
if (primitive * 6 >= output_indexes.length()) {
return;
}
const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
for (uint vertex = 0; vertex < 6; ++vertex) {
uint index = first + primitive * 4 + quad_map[vertex];
output_indexes[primitive * 6 + vertex] = index;
}
}

View File

@@ -0,0 +1,33 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
/*
* Build instructions:
* $ glslangValidator -V $THIS_FILE -o output.spv
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
* $ xxd -i optimized.spv
*
* Then copy that bytecode to the C++ file
*/
#version 460 core
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require
layout (local_size_x = 1024) in;
layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
uint8_t input_indexes[];
};
layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint16_t output_indexes[];
};
void main() {
uint id = gl_GlobalInvocationID.x;
if (id < input_indexes.length()) {
output_indexes[id] = uint16_t(input_indexes[id]);
}
}

View File

@@ -0,0 +1,627 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <tuple>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/morton.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/surface.h"
namespace Vulkan {
namespace {
// Generated from the "shaders/" directory, read the instructions there.
constexpr u8 blit_vertex_code[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
0x38, 0x00, 0x01, 0x00};
constexpr u8 blit_fragment_code[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
struct ScreenRectVertex {
ScreenRectVertex() = default;
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
std::array<f32, 2> position;
std::array<f32, 2> tex_coord;
static vk::VertexInputBindingDescription GetDescription() {
return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex),
vk::VertexInputRate::eVertex);
}
static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() {
return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat,
offsetof(ScreenRectVertex, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat,
offsetof(ScreenRectVertex, tex_coord))};
}
};
constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
// clang-format off
return { 2.f / width, 0.f, 0.f, 0.f,
0.f, 2.f / height, 0.f, 0.f,
0.f, 0.f, 1.f, 0.f,
-1.f, -1.f, 0.f, 1.f};
// clang-format on
}
std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
using namespace VideoCore::Surface;
return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
}
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
return static_cast<std::size_t>(framebuffer.stride) *
static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
}
vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) {
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return vk::Format::eA8B8G8R8UnormPack32;
case Tegra::FramebufferConfig::PixelFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
return vk::Format::eA8B8G8R8UnormPack32;
}
}
} // Anonymous namespace
struct VKBlitScreen::BufferData {
struct {
std::array<f32, 4 * 4> modelview_matrix;
} uniform;
std::array<ScreenRectVertex, 4> vertices;
// Unaligned image data goes here
};
VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKSwapchain& swapchain, VKScheduler& scheduler,
const VKScreenInfo& screen_info)
: system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
watches.resize(image_count);
std::generate(watches.begin(), watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); });
CreateStaticResources();
CreateDynamicResources();
}
VKBlitScreen::~VKBlitScreen() = default;
void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
const std::size_t image_index = swapchain.GetImageIndex();
watches[image_index]->Watch(scheduler.GetFence());
VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
UpdateDescriptorSet(image_index, blit_image->GetPresentView());
BufferData data;
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
auto map = buffer_commit->Map();
std::memcpy(map.GetAddress(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
map.GetAddress() + image_offset, host_ptr);
blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer,
vk::AccessFlagBits::eTransferWrite,
vk::ImageLayout::eTransferDstOptimal);
const vk::BufferImageCopy copy(image_offset, 0, 0,
{vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0},
{framebuffer.width, framebuffer.height, 1});
scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(),
copy](auto cmdbuf, auto& dld) {
cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal,
{copy}, dld);
});
}
map.Release();
blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader,
vk::AccessFlagBits::eShaderRead,
vk::ImageLayout::eShaderReadOnlyOptimal);
scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
size = swapchain.GetSize(), pipeline = *pipeline,
layout = *pipeline_layout](auto cmdbuf, auto& dld) {
const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}};
const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1,
&clear_color);
cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
cmdbuf.setViewport(
0,
{{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}},
dld);
cmdbuf.setScissor(0, {{{0, 0}, size}}, dld);
cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {},
dld);
cmdbuf.draw(4, 1, 0, 0, dld);
cmdbuf.endRenderPass(dld);
});
return {scheduler.GetFence(), *semaphores[image_index]};
}
void VKBlitScreen::CreateStaticResources() {
CreateShaders();
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateSampler();
}
void VKBlitScreen::CreateDynamicResources() {
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
}
void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
return;
}
raw_width = framebuffer.width;
raw_height = framebuffer.height;
ReleaseRawImages();
CreateStagingBuffer(framebuffer);
CreateRawImages(framebuffer);
}
void VKBlitScreen::CreateShaders() {
vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
}
void VKBlitScreen::CreateSemaphores() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
semaphores.resize(image_count);
for (std::size_t i = 0; i < image_count; ++i) {
semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
}
}
void VKBlitScreen::CreateDescriptorPool() {
const std::array<vk::DescriptorPoolSize, 2> pool_sizes{
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)},
vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler,
static_cast<u32>(image_count)}};
const vk::DescriptorPoolCreateInfo pool_ci(
{}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data());
const auto dev = device.GetLogical();
descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader());
}
void VKBlitScreen::CreateRenderPass() {
const vk::AttachmentDescription color_attachment(
{}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear,
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined,
vk::ImageLayout::ePresentSrcKHR);
const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal);
const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0,
nullptr, 1, &color_attachment_ref, nullptr,
nullptr, 0, nullptr);
const vk::SubpassDependency dependency(
VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eColorAttachmentOutput, {},
vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {});
const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description,
1, &dependency);
const auto dev = device.GetLogical();
renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader());
}
void VKBlitScreen::CreateDescriptorSetLayout() {
const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{
vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1,
vk::ShaderStageFlagBits::eVertex, nullptr),
vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1,
vk::ShaderStageFlagBits::eFragment, nullptr)};
const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
{}, static_cast<u32>(layout_bindings.size()), layout_bindings.data());
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
}
void VKBlitScreen::CreateDescriptorSets() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
descriptor_sets.resize(image_count);
for (std::size_t i = 0; i < image_count; ++i) {
const vk::DescriptorSetLayout layout = *descriptor_set_layout;
const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout);
const vk::Result result =
dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld);
ASSERT(result == vk::Result::eSuccess);
}
}
void VKBlitScreen::CreatePipelineLayout() {
const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0,
nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
}
void VKBlitScreen::CreateGraphicsPipeline() {
const std::array shader_stages = {
vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader,
"main", nullptr),
vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader,
"main", nullptr)};
const auto vertex_binding_description = ScreenRectVertex::GetDescription();
const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
const vk::PipelineVertexInputStateCreateInfo vertex_input(
{}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()),
vertex_attrs_description.data());
const vk::PipelineInputAssemblyStateCreateInfo input_assembly(
{}, vk::PrimitiveTopology::eTriangleStrip, false);
// Set a dummy viewport, it's going to be replaced by dynamic states.
const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
const vk::Rect2D scissor({0, 0}, {1, 1});
const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor);
const vk::PipelineRasterizationStateCreateInfo rasterizer(
{}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone,
vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f);
const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1,
false, 0.0f, nullptr, false, false);
const vk::PipelineColorBlendAttachmentState color_blend_attachment(
false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
const vk::PipelineColorBlendStateCreateInfo color_blending(
{}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f});
const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport,
vk::DynamicState::eScissor};
const vk::PipelineDynamicStateCreateInfo dynamic_state(
{}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
const vk::GraphicsPipelineCreateInfo pipeline_ci(
{}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input,
&input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr,
&color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld);
}
void VKBlitScreen::CreateSampler() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const vk::SamplerCreateInfo sampler_ci(
{}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear,
vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder,
vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever,
0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false);
sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld);
}
void VKBlitScreen::CreateFramebuffers() {
const vk::Extent2D size{swapchain.GetSize()};
framebuffers.clear();
framebuffers.resize(image_count);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
for (std::size_t i = 0; i < image_count; ++i) {
const vk::ImageView image_view{swapchain.GetImageViewIndex(i)};
const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width,
size.height, 1);
framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
}
}
void VKBlitScreen::ReleaseRawImages() {
for (std::size_t i = 0; i < raw_images.size(); ++i) {
watches[i]->Wait();
}
raw_images.clear();
raw_buffer_commits.clear();
buffer.reset();
buffer_commit.reset();
}
void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer),
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eUniformBuffer,
vk::SharingMode::eExclusive, 0, nullptr);
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
buffer_commit = memory_manager.Commit(*buffer, true);
}
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_buffer_commits.resize(image_count);
const auto format = GetFormat(framebuffer);
for (std::size_t i = 0; i < image_count; ++i) {
const vk::ImageCreateInfo image_ci(
{}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1,
vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal,
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled,
vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined);
raw_images[i] =
std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor);
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
}
}
void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const {
const vk::DescriptorSet descriptor_set = descriptor_sets[image_index];
const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform),
sizeof(BufferData::uniform));
const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1,
vk::DescriptorType::eUniformBuffer, nullptr,
&buffer_info, nullptr);
const vk::DescriptorImageInfo image_info(*sampler, image_view,
vk::ImageLayout::eShaderReadOnlyOptimal);
const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1,
vk::DescriptorType::eCombinedImageSampler,
&image_info, nullptr, nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld);
}
void VKBlitScreen::SetUniformData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
const auto& layout = render_window.GetFramebufferLayout();
data.uniform.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
}
void VKBlitScreen::SetVertexData(BufferData& data,
const Tegra::FramebufferConfig& framebuffer) const {
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
auto left = texcoords.left;
auto right = texcoords.right;
switch (framebuffer_transform_flags) {
case Tegra::FramebufferConfig::TransformFlags::Unset:
break;
case Tegra::FramebufferConfig::TransformFlags::FlipV:
// Flip the framebuffer vertically
left = texcoords.right;
right = texcoords.left;
break;
default:
UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
static_cast<u32>(framebuffer_transform_flags));
break;
}
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.0f;
f32 scale_v = 1.0f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
static_cast<f32>(screen_info.height);
}
const auto& screen = render_window.GetFramebufferLayout().screen;
const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth());
const auto h = static_cast<f32>(screen.GetHeight());
data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v);
data.vertices[1] = ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v);
data.vertices[2] = ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v);
data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
}
u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
u64 VKBlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}
} // namespace Vulkan

View File

@@ -0,0 +1,119 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <tuple>
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
struct FramebufferConfig;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
struct ScreenInfo;
class RasterizerVulkan;
class VKDevice;
class VKFence;
class VKImage;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
public:
explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKSwapchain& swapchain, VKScheduler& scheduler,
const VKScreenInfo& screen_info);
~VKBlitScreen();
void Recreate();
std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
private:
struct BufferData;
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
void CreateGraphicsPipeline();
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const;
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
Core::System& system;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device;
VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKSwapchain& swapchain;
VKScheduler& scheduler;
const std::size_t image_count;
const VKScreenInfo& screen_info;
UniqueShaderModule vertex_shader;
UniqueShaderModule fragment_shader;
UniqueDescriptorPool descriptor_pool;
UniqueDescriptorSetLayout descriptor_set_layout;
UniquePipelineLayout pipeline_layout;
UniquePipeline pipeline;
UniqueRenderPass renderpass;
std::vector<UniqueFramebuffer> framebuffers;
std::vector<vk::DescriptorSet> descriptor_sets;
UniqueSampler sampler;
UniqueBuffer buffer;
VKMemoryCommit buffer_commit;
std::vector<std::unique_ptr<VKFenceWatch>> watches;
std::vector<UniqueSemaphore> semaphores;
std::vector<std::unique_ptr<VKImage>> raw_images;
std::vector<VKMemoryCommit> raw_buffer_commits;
u32 raw_width = 0;
u32 raw_height = 0;
};
} // namespace Vulkan

View File

@@ -2,124 +2,145 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstring>
#include <memory>
#include <optional>
#include <tuple>
#include "common/alignment.h"
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
#include "common/bit_util.h"
#include "core/core.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
std::size_t alignment, u8* host_ptr)
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
alignment{alignment} {}
namespace {
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
Memory::Memory& cpu_memory_,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
cpu_memory_} {
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eUniformBuffer;
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
vk::AccessFlagBits::eUniformRead;
stream_buffer =
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
vk::PipelineStageFlagBits::eAllCommands);
buffer_handle = stream_buffer->GetBuffer();
const auto BufferUsage =
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
const auto UploadPipelineStage =
vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader;
const auto UploadAccessBarriers =
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
vk::AccessFlagBits::eIndexRead;
auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
}
} // Anonymous namespace
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size)
: VideoCommon::BufferBlock{cache_addr, size} {
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst,
vk::SharingMode::eExclusive, 0, nullptr);
const auto& dld{device.GetDispatchLoader()};
const auto dev{device.GetLogical()};
buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
buffer.commit = memory_manager.Commit(*buffer.handle, false);
}
CachedBufferBlock::~CachedBufferBlock() = default;
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
: VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
CreateStreamBuffer(device,
scheduler)},
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
staging_pool} {}
VKBufferCache::~VKBufferCache() = default;
u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
if (cache) {
const auto entry = TryGet(host_ptr);
if (entry) {
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
return entry->GetOffset();
}
Unregister(entry);
}
}
AlignBuffer(alignment);
const u64 uploaded_offset = buffer_offset;
if (host_ptr == nullptr) {
return uploaded_offset;
}
std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
alignment, host_ptr);
Register(entry);
}
return uploaded_offset;
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
}
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
const u64 uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return uploaded_offset;
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle();
}
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
AlignBuffer(alignment);
u8* const uploaded_ptr = buffer_ptr;
const u64 uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return {uploaded_ptr, uploaded_offset};
const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
});
return &*empty.handle;
}
void VKBufferCache::Reserve(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
buffer_offset = buffer_offset_base;
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
std::memcpy(staging.commit->Map(size), data, size);
if (invalidate) {
InvalidateAll();
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
size](auto cmdbuf, auto& dld) {
cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
cmdbuf.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
offset, size)},
{}, dld);
});
}
void VKBufferCache::Send() {
stream_buffer->Send(buffer_offset - buffer_offset_base);
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
size](auto cmdbuf, auto& dld) {
cmdbuf.pipelineBarrier(
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer, {}, {},
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
{}, dld);
cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
});
scheduler.Finish();
std::memcpy(data, staging.commit->Map(size), size);
}
void VKBufferCache::AlignBuffer(std::size_t alignment) {
// Align the offset, not the mapped pointer
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned;
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
dst_offset, size](auto cmdbuf, auto& dld) {
cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
cmdbuf.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
dst_offset, size)},
{}, dld);
});
}
} // namespace Vulkan

View File

@@ -5,105 +5,74 @@
#pragma once
#include <memory>
#include <tuple>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Memory {
class Memory;
}
namespace Tegra {
class MemoryManager;
namespace Core {
class System;
}
namespace Vulkan {
class VKDevice;
class VKFence;
class VKMemoryManager;
class VKStreamBuffer;
class VKScheduler;
class CachedBufferEntry final : public RasterizerCacheObject {
class CachedBufferBlock final : public VideoCommon::BufferBlock {
public:
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
u8* host_ptr);
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size);
~CachedBufferBlock();
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
std::size_t GetSize() const {
return size;
}
u64 GetOffset() const {
return offset;
}
std::size_t GetAlignment() const {
return alignment;
const vk::Buffer* GetHandle() const {
return &*buffer.handle;
}
private:
VAddr cpu_addr{};
std::size_t size{};
u64 offset{};
std::size_t alignment{};
VKBuffer buffer;
};
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
using Buffer = std::shared_ptr<CachedBufferBlock>;
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
public:
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
void Reserve(std::size_t max_size);
/// Ensures that the set data is sent to the device.
void Send();
/// Returns the buffer cache handle.
vk::Buffer GetBuffer() const {
return buffer_handle;
}
const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
protected:
// We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
void WriteBarrier() override {}
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
const vk::Buffer* ToHandle(const Buffer& buffer) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override;
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) override;
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
private:
void AlignBuffer(std::size_t alignment);
Tegra::MemoryManager& tegra_memory_manager;
Memory::Memory& cpu_memory;
std::unique_ptr<VKStreamBuffer> stream_buffer;
vk::Buffer buffer_handle;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
const VKDevice& device;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
};
} // namespace Vulkan

View File

@@ -0,0 +1,339 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <optional>
#include <utility>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
namespace {
// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
constexpr u8 quad_array[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
// Uint8 SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 uint8_pass[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
} // Anonymous namespace
VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
const std::vector<vk::PushConstantRange> push_constants,
std::size_t code_size, const u8* code) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
{}, static_cast<u32>(bindings.size()), bindings.data());
descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout,
static_cast<u32>(push_constants.size()),
push_constants.data());
layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
if (!templates.empty()) {
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
{}, static_cast<u32>(templates.size()), templates.data(),
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
vk::PipelineBindPoint::eGraphics, *layout, 0);
descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
}
auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
std::memcpy(code_copy.get(), code, code_size);
const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module,
"main", nullptr);
const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0);
pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld);
}
VKComputePass::~VKComputePass() = default;
vk::DescriptorSet VKComputePass::CommitDescriptorSet(
VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) {
if (!descriptor_template) {
return {};
}
const auto set = descriptor_allocator->Commit(fence);
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: VKComputePass(device, descriptor_pool,
{vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
vk::ShaderStageFlagBits::eCompute, nullptr)},
{vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
0, sizeof(DescriptorUpdateEntry))},
{vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
std::size(quad_array), quad_array),
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
update_descriptor_queue{update_descriptor_queue} {}
QuadArrayPass::~QuadArrayPass() = default;
std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = num_vertices * 6 / 4;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
ASSERT(num_vertices % 4 == 0);
const u32 num_quads = num_vertices / 4;
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
first, set](auto cmdbuf, auto& dld) {
constexpr u32 dispatch_size = 1024;
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first,
dld);
cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld);
const vk::BufferMemoryBarrier barrier(
vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32));
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
});
return {*buffer.handle, 0};
}
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: VKComputePass(device, descriptor_pool,
{vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
vk::ShaderStageFlagBits::eCompute, nullptr),
vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
vk::ShaderStageFlagBits::eCompute, nullptr)},
{vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
0, sizeof(DescriptorUpdateEntry))},
{}, std::size(uint8_pass), uint8_pass),
scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
update_descriptor_queue{update_descriptor_queue} {}
Uint8Pass::~Uint8Pass() = default;
std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer,
u64 src_offset) {
const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
num_vertices](auto cmdbuf, auto& dld) {
constexpr u32 dispatch_size = 1024;
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld);
const vk::BufferMemoryBarrier barrier(
vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16));
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
});
return {&*buffer.handle, 0};
}
} // namespace Vulkan

View File

@@ -0,0 +1,77 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
namespace Vulkan {
class VKDevice;
class VKFence;
class VKScheduler;
class VKStagingBufferPool;
class VKUpdateDescriptorQueue;
class VKComputePass {
public:
explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
const std::vector<vk::PushConstantRange> push_constants,
std::size_t code_size, const u8* code);
~VKComputePass();
protected:
vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
VKFence& fence);
UniqueDescriptorUpdateTemplate descriptor_template;
UniquePipelineLayout layout;
UniquePipeline pipeline;
private:
UniqueDescriptorSetLayout descriptor_set_layout;
std::optional<DescriptorAllocator> descriptor_allocator;
UniqueShaderModule module;
};
class QuadArrayPass final : public VKComputePass {
public:
explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~QuadArrayPass();
std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~Uint8Pass();
std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer,
u64 src_offset);
private:
VKScheduler& scheduler;
VKStagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
};
} // namespace Vulkan

View File

@@ -0,0 +1,112 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
const SPIRVShader& shader)
: device{device}, scheduler{scheduler}, entries{shader.entries},
descriptor_set_layout{CreateDescriptorSetLayout()},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate()},
shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
VKComputePipeline::~VKComputePipeline() = default;
vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
std::vector<vk::DescriptorSetLayoutBinding> bindings;
u32 binding = 0;
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
// TODO(Rodrigo): Maybe make individual bindings here?
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
nullptr);
}
};
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
{}, static_cast<u32>(bindings.size()), bindings.data());
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
}
UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
const auto dev = device.GetLogical();
return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
}
UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{};
}
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
}
UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
const auto dev = device.GetLogical();
return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
}
UniquePipeline VKComputePipeline::CreatePipeline() const {
vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
*shader_module, "main", nullptr);
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
shader_stage_ci.pNext = &subgroup_size_ci;
}
const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
const auto dev = device.GetLogical();
return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
}
} // namespace Vulkan

View File

@@ -0,0 +1,66 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKUpdateDescriptorQueue;
class VKComputePipeline final {
public:
explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
const SPIRVShader& shader);
~VKComputePipeline();
vk::DescriptorSet CommitDescriptorSet();
vk::Pipeline GetHandle() const {
return *pipeline;
}
vk::PipelineLayout GetLayout() const {
return *layout;
}
const ShaderEntries& GetEntries() {
return entries;
}
private:
UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
UniquePipelineLayout CreatePipelineLayout() const;
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
UniquePipeline CreatePipeline() const;
const VKDevice& device;
VKScheduler& scheduler;
ShaderEntries entries;
UniqueDescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
UniquePipelineLayout layout;
UniqueDescriptorUpdateTemplate descriptor_template;
UniqueShaderModule shader_module;
UniquePipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,89 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
vk::DescriptorSetLayout layout)
: VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
DescriptorAllocator::~DescriptorAllocator() = default;
vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
return *descriptors[CommitResource(fence)];
}
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
std::make_move_iterator(new_sets.end()));
}
VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
: device{device}, active_pool{AllocateNewPool()} {}
VKDescriptorPool::~VKDescriptorPool() = default;
vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
static constexpr u32 num_sets = 0x20000;
static constexpr vk::DescriptorPoolSize pool_sizes[] = {
{vk::DescriptorType::eUniformBuffer, num_sets * 90},
{vk::DescriptorType::eStorageBuffer, num_sets * 60},
{vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
{vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
{vk::DescriptorType::eStorageImage, num_sets * 40}};
const vk::DescriptorPoolCreateInfo create_info(
vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
const auto dev = device.GetLogical();
return *pools.emplace_back(
dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
}
std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
vk::DescriptorSetLayout layout, std::size_t count) {
std::vector layout_copies(count, layout);
vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
layout_copies.data());
std::vector<vk::DescriptorSet> sets(count);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
case vk::Result::eSuccess:
break;
case vk::Result::eErrorOutOfPoolMemory:
active_pool = AllocateNewPool();
allocate_info.descriptorPool = active_pool;
if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
break;
}
[[fallthrough]];
default:
vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
}
vk::PoolFree deleter(dev, active_pool, dld);
std::vector<UniqueDescriptorSet> unique_sets;
unique_sets.reserve(count);
for (const auto set : sets) {
unique_sets.push_back(UniqueDescriptorSet{set, deleter});
}
return unique_sets;
}
} // namespace Vulkan

View File

@@ -0,0 +1,56 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace Vulkan {
class VKDescriptorPool;
class DescriptorAllocator final : public VKFencedPool {
public:
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
~DescriptorAllocator() override;
DescriptorAllocator(const DescriptorAllocator&) = delete;
vk::DescriptorSet Commit(VKFence& fence);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
VKDescriptorPool& descriptor_pool;
const vk::DescriptorSetLayout layout;
std::vector<UniqueDescriptorSet> descriptors;
};
class VKDescriptorPool final {
friend DescriptorAllocator;
public:
explicit VKDescriptorPool(const VKDevice& device);
~VKDescriptorPool();
private:
vk::DescriptorPool AllocateNewPool();
std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
std::size_t count);
const VKDevice& device;
std::vector<UniqueDescriptorPool> pools;
vk::DescriptorPool active_pool;
};
} // namespace Vulkan

View File

@@ -3,12 +3,15 @@
// Refer to the license.txt file included.
#include <bitset>
#include <chrono>
#include <cstdlib>
#include <optional>
#include <set>
#include <string_view>
#include <thread>
#include <vector>
#include "common/assert.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
@@ -201,6 +204,22 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
return wanted_format;
}
void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait some time to let the log flush
std::this_thread::sleep_for(std::chrono::seconds{1});
if (!nv_device_diagnostic_checkpoints) {
return;
}
[[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld);
// Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
// executed. It can be done on a debugger by evaluating the expression:
// *(VKGraphicsPipeline*)data[0]
}
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const {
// Disable for now to avoid converting ASTC twice.
@@ -381,6 +400,8 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
Test(extension, nv_device_diagnostic_checkpoints,
VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
}
if (khr_shader_float16_int8) {
@@ -464,6 +485,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8UintPack32,
vk::Format::eA8B8G8R8SnormPack32,
vk::Format::eA8B8G8R8SrgbPack32,
vk::Format::eB5G6R5UnormPack16,

View File

@@ -39,6 +39,9 @@ public:
vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
FormatType format_type) const;
/// Reports a device loss.
void ReportLoss() const;
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
return dld;
@@ -159,6 +162,11 @@ public:
return ext_shader_viewport_index_layer;
}
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -218,6 +226,7 @@ private:
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.

View File

@@ -0,0 +1,271 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
MaxwellToVK::StencilOp(face.action_depth_pass),
MaxwellToVK::StencilOp(face.action_depth_fail),
MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
}
bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
static constexpr std::array unsupported_topologies = {
vk::PrimitiveTopology::ePointList,
vk::PrimitiveTopology::eLineList,
vk::PrimitiveTopology::eTriangleList,
vk::PrimitiveTopology::eLineListWithAdjacency,
vk::PrimitiveTopology::eTriangleListWithAdjacency,
vk::PrimitiveTopology::ePatchList};
return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
topology) == std::end(unsupported_topologies);
}
} // Anonymous namespace
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache,
const GraphicsPipelineCacheKey& key,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const SPIRVProgram& program)
: device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)},
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
key.renderpass_params,
program)} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
{}, static_cast<u32>(bindings.size()), bindings.data());
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
}
UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
nullptr);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
}
UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const {
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0;
u32 offset = 0;
for (const auto& stage : program) {
if (stage) {
FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
template_entries);
}
}
if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{};
}
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
}
std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
const SPIRVProgram& program) const {
std::vector<UniqueShaderModule> modules;
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
const auto& stage = program[i];
if (!stage) {
continue;
}
const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
stage->code.data());
modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
}
return modules;
}
UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const {
const auto& vi = fixed_state.vertex_input;
const auto& ia = fixed_state.input_assembly;
const auto& ds = fixed_state.depth_stencil;
const auto& cd = fixed_state.color_blending;
const auto& ts = fixed_state.tessellation;
const auto& rs = fixed_state.rasterizer;
std::vector<vk::VertexInputBindingDescription> vertex_bindings;
std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t i = 0; i < vi.num_bindings; ++i) {
const auto& binding = vi.bindings[i];
const bool instanced = binding.divisor != 0;
const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
vertex_bindings.emplace_back(binding.index, binding.stride, rate);
if (instanced) {
vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
}
}
std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
const auto& input_attributes = program[0]->entries.attributes;
for (std::size_t i = 0; i < vi.num_attributes; ++i) {
const auto& attribute = vi.attributes[i];
if (input_attributes.find(attribute.index) == input_attributes.end()) {
// Skip attributes not used by the vertex shaders.
continue;
}
vertex_attributes.emplace_back(attribute.index, attribute.buffer,
MaxwellToVK::VertexFormat(attribute.type, attribute.size),
attribute.offset);
}
vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
{}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &vertex_input_divisor_ci;
}
const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
{}, primitive_topology,
ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
Maxwell::NumViewports, nullptr);
// TODO(Rodrigo): Find out what's the default register value for front face
const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
{}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
{}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
const vk::CompareOp depth_test_compare = ds.depth_test_enable
? MaxwellToVK::ComparisonOp(ds.depth_test_function)
: vk::CompareOp::eAlways;
const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
{}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const std::size_t num_attachments =
std::min(cd.attachments_count, renderpass_params.color_attachments.size());
for (std::size_t i = 0; i < num_attachments; ++i) {
constexpr std::array component_table{
vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
const auto& blend = cd.attachments[i];
vk::ColorComponentFlags color_components{};
for (std::size_t j = 0; j < component_table.size(); ++j) {
if (blend.components[j])
color_components |= component_table[j];
}
cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
MaxwellToVK::BlendFactor(blend.dst_rgb_func),
MaxwellToVK::BlendEquation(blend.rgb_equation),
MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
MaxwellToVK::BlendEquation(blend.a_equation), color_components);
}
const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
static_cast<u32>(num_attachments),
cb_attachments.data(), {});
constexpr std::array dynamic_states = {
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants,
vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
{}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
std::size_t module_index = 0;
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
if (!program[stage]) {
continue;
}
const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
*modules[module_index++], "main", nullptr);
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
stage_ci.pNext = &subgroup_size_ci;
}
}
const vk::GraphicsPipelineCreateInfo create_info(
{}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
&input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
&depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
}
} // namespace Vulkan

View File

@@ -0,0 +1,90 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <optional>
#include <unordered_map>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey;
class VKDescriptorPool;
class VKDevice;
class VKRenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
class VKGraphicsPipeline final {
public:
explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache,
const GraphicsPipelineCacheKey& key,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
const SPIRVProgram& program);
~VKGraphicsPipeline();
vk::DescriptorSet CommitDescriptorSet();
vk::Pipeline GetHandle() const {
return *pipeline;
}
vk::PipelineLayout GetLayout() const {
return *layout;
}
vk::RenderPass GetRenderPass() const {
return renderpass;
}
private:
UniqueDescriptorSetLayout CreateDescriptorSetLayout(
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
UniquePipelineLayout CreatePipelineLayout() const;
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
const SPIRVProgram& program) const;
std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const;
const VKDevice& device;
VKScheduler& scheduler;
const FixedPipelineState fixed_state;
const u64 hash;
UniqueDescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
VKUpdateDescriptorQueue& update_descriptor_queue;
UniquePipelineLayout layout;
UniqueDescriptorUpdateTemplate descriptor_template;
std::vector<UniqueShaderModule> modules;
vk::RenderPass renderpass;
UniquePipeline pipeline;
};
} // namespace Vulkan

View File

@@ -0,0 +1,106 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask)
: device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
"Queue family tracking is not implemented");
const auto dev = device.GetLogical();
image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
const u32 num_ranges = image_num_layers * image_num_levels;
barriers.resize(num_ranges);
subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
}
VKImage::~VKImage() = default;
void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
vk::ImageLayout new_layout) {
if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
return;
}
std::size_t cursor = 0;
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
const u32 layer = base_layer + layer_it;
const u32 level = base_level + level_it;
auto& state = GetSubrangeState(layer, level);
barriers[cursor] = vk::ImageMemoryBarrier(
state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1});
state.access = new_access;
state.layout = new_layout;
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) {
// TODO(Rodrigo): Implement a way to use the latest stage across subresources.
constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands;
cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr,
static_cast<u32>(cursor), barriers.data(), dld);
});
}
bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept {
const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
base_level == 0 && num_levels == image_num_levels;
if (!is_full_range) {
state_diverged = true;
}
if (!state_diverged) {
auto& state = GetSubrangeState(0, 0);
if (state.access != new_access || state.layout != new_layout) {
return true;
}
}
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
for (u32 level_it = 0; level_it < num_levels; ++level_it) {
const u32 layer = base_layer + layer_it;
const u32 level = base_level + level_it;
auto& state = GetSubrangeState(layer, level);
if (state.access != new_access || state.layout != new_layout) {
return true;
}
}
}
return false;
}
void VKImage::CreatePresentView() {
// Image type has to be 2D to be presented.
const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {},
{aspect_mask, 0, 1, 0, 1});
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld);
}
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
static_cast<std::size_t>(level)];
}
} // namespace Vulkan

View File

@@ -0,0 +1,84 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
class VKScheduler;
class VKImage {
public:
explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask);
~VKImage();
/// Records in the passed command buffer an image transition and updates the state of the image.
void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
vk::ImageLayout new_layout);
/// Returns a view compatible with presentation, the image has to be 2D.
vk::ImageView GetPresentView() {
if (!present_view) {
CreatePresentView();
}
return *present_view;
}
/// Returns the Vulkan image handler.
vk::Image GetHandle() const {
return *image;
}
/// Returns the Vulkan format for this image.
vk::Format GetFormat() const {
return format;
}
/// Returns the Vulkan aspect mask.
vk::ImageAspectFlags GetAspectMask() const {
return aspect_mask;
}
private:
struct SubrangeState final {
vk::AccessFlags access{}; ///< Current access bits.
vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout.
};
bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept;
/// Creates a presentation view.
void CreatePresentView();
/// Returns the subrange state for a layer and layer.
SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
const VKDevice& device; ///< Device handler.
VKScheduler& scheduler; ///< Device scheduler.
const vk::Format format; ///< Vulkan format.
const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
const u32 image_num_layers; ///< Number of layers.
const u32 image_num_levels; ///< Number of mipmap levels.
UniqueImage image; ///< Image handle.
UniqueImageView present_view; ///< Image view compatible with presentation.
std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers.
std::vector<SubrangeState> subrange_states; ///< Current subrange state.
bool state_diverged = false; ///< True when subresources mismatch in layout.
};
} // namespace Vulkan

View File

@@ -6,6 +6,7 @@
#include <optional>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
@@ -16,34 +17,32 @@
namespace Vulkan {
// TODO(Rodrigo): Fine tune this number
constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
namespace {
u64 GetAllocationChunkSize(u64 required_size) {
static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
}
} // Anonymous namespace
class VKMemoryAllocation final {
public:
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
: device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
shifted_type{ShiftType(type)}, is_mappable{properties &
vk::MemoryPropertyFlagBits::eHostVisible} {
if (is_mappable) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
}
}
vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
: device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
shifted_type{ShiftType(type)} {}
~VKMemoryAllocation() {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
if (is_mappable)
dev.unmapMemory(memory, dld);
dev.free(memory, nullptr, dld);
}
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
auto found = TryFindFreeSection(free_iterator, allocation_size,
static_cast<u64>(commit_size), static_cast<u64>(alignment));
if (!found) {
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
return nullptr;
}
}
u8* address = is_mappable ? base_address + *found : nullptr;
auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
*found + commit_size);
commits.push_back(commit.get());
@@ -65,12 +63,10 @@ public:
void Free(const VKMemoryCommitImpl* commit) {
ASSERT(commit);
const auto it =
std::find_if(commits.begin(), commits.end(),
[&](const auto& stored_commit) { return stored_commit == commit; });
const auto it = std::find(std::begin(commits), std::end(commits), commit);
if (it == commits.end()) {
LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
UNREACHABLE();
UNREACHABLE_MSG("Freeing unallocated commit!");
return;
}
commits.erase(it);
@@ -88,11 +84,11 @@ private:
}
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
/// requeriments.
/// requirements.
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
u64 iterator = start;
while (iterator + size < end) {
const u64 try_left = Common::AlignUp(iterator, alignment);
u64 iterator = Common::AlignUp(start, alignment);
while (iterator + size <= end) {
const u64 try_left = iterator;
const u64 try_right = try_left + size;
bool overlap = false;
@@ -100,7 +96,7 @@ private:
const auto [commit_left, commit_right] = commit->interval;
if (try_left < commit_right && commit_left < try_right) {
// There's an overlap, continue the search where the overlapping commit ends.
iterator = commit_right;
iterator = Common::AlignUp(commit_right, alignment);
overlap = true;
break;
}
@@ -110,6 +106,7 @@ private:
return try_left;
}
}
// No free regions where found, return an empty optional.
return std::nullopt;
}
@@ -117,12 +114,8 @@ private:
const VKDevice& device; ///< Vulkan device.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
const u64 alloc_size; ///< Size of this allocation.
const u64 allocation_size; ///< Size of this allocation.
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
const bool is_mappable; ///< Whether the allocation is mappable.
/// Base address of the mapped pointer.
u8* base_address{};
/// Hints where the next free region is likely going to be.
u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
};
VKMemoryManager::VKMemoryManager(const VKDevice& device)
: device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
is_memory_unified{GetMemoryUnified(props)} {}
: device{device}, properties{device.GetPhysical().getMemoryProperties(
device.GetDispatchLoader())},
is_memory_unified{GetMemoryUnified(properties)} {}
VKMemoryManager::~VKMemoryManager() = default;
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
bool host_visible) {
const u64 chunk_size = GetAllocationChunkSize(requirements.size);
// When a host visible commit is asked, search for host visible and coherent, otherwise search
// for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
: vk::MemoryPropertyFlagBits::eDeviceLocal;
const auto TryCommit = [&]() -> VKMemoryCommit {
for (auto& alloc : allocs) {
if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
continue;
if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
return commit;
}
}
return {};
};
if (auto commit = TryCommit(); commit) {
if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
return commit;
}
// Commit has failed, allocate more memory.
if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
// TODO(Rodrigo): Try to use host memory.
LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
UNREACHABLE();
if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
// TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
// Allocation has failed, panic.
UNREACHABLE_MSG("Ran out of VRAM!");
return {};
}
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
// there's a bug.
auto commit = TryCommit();
auto commit = TryAllocCommit(requirements, wanted_properties);
ASSERT(commit);
return commit;
}
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
auto commit = Commit(requeriments, host_visible);
auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const auto requeriments = dev.getImageMemoryRequirements(image, dld);
auto commit = Commit(requeriments, host_visible);
auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
return commit;
}
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
u64 size) {
const u32 type = [&]() {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
const auto flags = props.memoryTypes[type_index].propertyFlags;
const u32 type = [&] {
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
const auto flags = properties.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
UNREACHABLE();
return 0u;
UNREACHABLE_MSG("Couldn't find a compatible memory type!");
return 0U;
}();
const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
// Try to allocate found type.
const vk::MemoryAllocateInfo memory_ai(size, type);
vk::DeviceMemory memory;
if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
res != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
return false;
}
allocs.push_back(
allocations.push_back(
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
return true;
}
/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
vk::MemoryPropertyFlags wanted_properties) {
for (auto& allocation : allocations) {
if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
continue;
}
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
return commit;
}
}
return {};
}
/*static*/ bool VKMemoryManager::GetMemoryUnified(
const vk::PhysicalDeviceMemoryProperties& properties) {
for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
// Memory is considered unified when heaps are device local only.
return false;
}
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
return true;
}
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
u8* data, u64 begin, u64 end)
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
vk::DeviceMemory memory, u64 begin, u64 end)
: device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
allocation->Free(this);
}
u8* VKMemoryCommitImpl::GetData() const {
ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
return data;
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
const auto dev = device.GetLogical();
const auto address = reinterpret_cast<u8*>(
dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
return MemoryMap{this, address};
}
void VKMemoryCommitImpl::Unmap() const {
const auto dev = device.GetLogical();
dev.unmapMemory(memory, device.GetDispatchLoader());
}
MemoryMap VKMemoryCommitImpl::Map() const {
return Map(interval.second - interval.first);
}
} // namespace Vulkan

View File

@@ -12,6 +12,7 @@
namespace Vulkan {
class MemoryMap;
class VKDevice;
class VKMemoryAllocation;
class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
class VKMemoryManager final {
public:
explicit VKMemoryManager(const VKDevice& device);
VKMemoryManager(const VKMemoryManager&) = delete;
~VKMemoryManager();
/**
* Commits a memory with the specified requeriments.
* @param reqs Requeriments returned from a Vulkan call.
* @param requirements Requirements returned from a Vulkan call.
* @param host_visible Signals the allocator that it *must* use host visible and coherent
* memory. When passing false, it will try to allocate device local memory.
* memory. When passing false, it will try to allocate device local memory.
* @returns A memory commit.
*/
VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
/// Allocates a chunk of memory.
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
/// Returns true if the device uses an unified memory model.
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
/// Tries to allocate a memory commit.
VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
vk::MemoryPropertyFlags wanted_properties);
const VKDevice& device; ///< Device handler.
const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
const bool is_memory_unified; ///< True if memory model is unified.
std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
/// Returns true if the device uses an unified memory model.
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
const VKDevice& device; ///< Device handler.
const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
const bool is_memory_unified; ///< True if memory model is unified.
std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
};
class VKMemoryCommitImpl final {
friend VKMemoryAllocation;
friend MemoryMap;
public:
explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
u64 begin, u64 end);
explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
vk::DeviceMemory memory, u64 begin, u64 end);
~VKMemoryCommitImpl();
/// Returns the writeable memory map. The commit has to be mappable.
u8* GetData() const;
/// Maps a memory region and returns a pointer to it.
/// It's illegal to have more than one memory map at the same time.
MemoryMap Map(u64 size, u64 offset = 0) const;
/// Maps the whole commit and returns a pointer to it.
/// It's illegal to have more than one memory map at the same time.
MemoryMap Map() const;
/// Returns the Vulkan memory handler.
vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
}
private:
/// Unmaps memory.
void Unmap() const;
const VKDevice& device; ///< Vulkan device.
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
vk::DeviceMemory memory; ///< Vulkan device memory handler.
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
};
/// Holds ownership of a memory map.
class MemoryMap final {
public:
explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
: commit{commit}, address{address} {}
~MemoryMap() {
if (commit) {
commit->Unmap();
}
}
/// Prematurely releases the memory map.
void Release() {
commit->Unmap();
commit = nullptr;
}
/// Returns the address of the memory map.
u8* GetAddress() const {
return address;
}
/// Returns the address of the memory map;
operator u8*() const {
return address;
}
private:
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
u8* address{}; ///< Address to the mapped memory.
};
} // namespace Vulkan

View File

@@ -0,0 +1,395 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <memory>
#include <vector>
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/compiler_settings.h"
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
using Tegra::Engines::ShaderType;
namespace {
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
VideoCommon::Shader::CompileDepth::FullDecompile};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
const auto& gpu{system.GPU().Maxwell3D()};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
/// Gets if the current instruction offset is a scheduler instruction
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// Sched instructions appear once every 4 instructions.
constexpr std::size_t SchedPeriod = 4;
const std::size_t absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
const std::size_t start_offset = is_compute ? 0 : 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
if (instruction == 0) {
break;
}
}
++offset;
}
// The last instruction is included in the program size
return std::min(offset + 1, program.size());
}
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr, bool is_compute) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
std::fill(program_code.begin(), program_code.end(), 0);
return program_code;
});
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
program_code.size() * sizeof(u64));
program_code.resize(CalculateProgramSize(program_code, is_compute));
return program_code;
}
constexpr std::size_t GetStageFromProgram(std::size_t program) {
return program == 0 ? 0 : program - 1;
}
constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
}
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
switch (program) {
case Maxwell::ShaderProgram::VertexB:
return ShaderType::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return ShaderType::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return ShaderType::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return ShaderType::Geometry;
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
default:
UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
return ShaderType::Vertex;
}
}
u32 FillDescriptorLayout(const ShaderEntries& entries,
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
Maxwell::ShaderProgram program_type, u32 base_binding) {
const ShaderType stage = GetStageFromProgram(program_type);
const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
u32 binding = base_binding;
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
for (std::size_t i = 0; i < num_entries; ++i) {
bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
}
};
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
return binding;
}
} // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, locker},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
Core::System& system, Tegra::Engines::ShaderType stage) {
if (stage == Tegra::Engines::ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
}
}
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue)
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
renderpass_cache(device) {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
if (!dirty) {
return last_shaders;
}
dirty = false;
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)};
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset);
Register(shader);
}
shaders[index] = std::move(shader);
}
return last_shaders = shaders;
}
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
return *last_graphics_pipeline;
}
last_graphics_key = key;
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
}
return *(last_graphics_pipeline = entry.get());
}
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
auto& entry = pair->second;
if (!is_cache_miss) {
return *entry;
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr);
if (!shader) {
// No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code),
kernel_main_offset);
Register(shader);
}
Specialization specialization;
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
const SPIRVShader spirv_shader{
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
}
void VKPipelineCache::Unregister(const Shader& shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
// flush.
if (finished) {
return;
}
finished = true;
scheduler.Finish();
};
const GPUVAddr invalidated_addr = shader->GetGpuAddr();
for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
auto& entry = it->first;
if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
entry.shaders.end()) {
++it;
continue;
}
Finish();
it = graphics_cache.erase(it);
}
for (auto it = compute_cache.begin(); it != compute_cache.end();) {
auto& entry = it->first;
if (entry.shader != invalidated_addr) {
++it;
continue;
}
Finish();
it = compute_cache.erase(it);
}
RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& fixed_state = key.fixed_state;
auto& memory_manager = system.GPU().MemoryManager();
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
specialization.primitive_topology = fixed_state.input_assembly.topology;
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
for (const auto& rt : key.renderpass_params.color_attachments) {
specialization.enabled_rendertargets.set(rt.index);
}
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
const auto shader = TryGet(host_ptr);
ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
++index;
}
const u32 old_binding = specialization.base_binding;
specialization.base_binding =
FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
}
return {std::move(program), std::move(bindings)};
}
void FillDescriptorUpdateTemplateEntries(
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
const u32 count = static_cast<u32>(count_);
if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
// crash.
for (u32 i = 0; i < count; ++i) {
template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
offset + i * entry_size, entry_size);
}
} else if (count != 0) {
template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
}
offset += count * entry_size;
binding += count;
};
AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
}
} // namespace Vulkan

View File

@@ -0,0 +1,200 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <memory>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
namespace Core {
class System;
}
namespace Vulkan {
class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
class VKDevice;
class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
class CachedShader;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramCode = std::vector<u64>;
struct GraphicsPipelineCacheKey {
FixedPipelineState fixed_state;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
RenderPassParams renderpass_params;
std::size_t Hash() const noexcept {
std::size_t hash = fixed_state.Hash();
for (const auto& shader : shaders) {
boost::hash_combine(hash, shader);
}
boost::hash_combine(hash, renderpass_params.Hash());
return hash;
}
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
return std::tie(fixed_state, shaders, renderpass_params) ==
std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
}
};
struct ComputePipelineCacheKey {
GPUVAddr shader{};
u32 shared_memory_size{};
std::array<u32, 3> workgroup_size{};
std::size_t Hash() const noexcept {
return static_cast<std::size_t>(shader) ^
((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
static_cast<std::size_t>(workgroup_size[0]) ^
(static_cast<std::size_t>(workgroup_size[1]) << 16) ^
(static_cast<std::size_t>(workgroup_size[2]) << 24);
}
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
return std::tie(shader, shared_memory_size, workgroup_size) ==
std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class CachedShader final : public RasterizerCacheObject {
public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
~CachedShader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64);
}
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
const ShaderEntries& GetEntries() const {
return entries;
}
private:
static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::ConstBufferLocker locker;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};
class VKPipelineCache final : public RasterizerCache<Shader> {
public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue);
~VKPipelineCache();
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
protected:
void Unregister(const Shader& shader) override;
void FlushObjectInner(const Shader& object) override {}
private:
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
const GraphicsPipelineCacheKey& key);
Core::System& system;
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache renderpass_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
};
void FillDescriptorUpdateTemplateEntries(
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,263 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <bitset>
#include <memory>
#include <utility>
#include <vector>
#include <boost/container/static_vector.hpp>
#include <boost/functional/hash.hpp>
#include "common/common_types.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra::Engines {
class Maxwell3D;
}
namespace Vulkan {
struct VKScreenInfo;
using ImageViewsPack =
boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
struct FramebufferCacheKey {
vk::RenderPass renderpass{};
u32 width = 0;
u32 height = 0;
ImageViewsPack views;
std::size_t Hash() const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
for (const auto& view : views) {
boost::hash_combine(hash, static_cast<VkImageView>(view));
}
boost::hash_combine(hash, width);
boost::hash_combine(hash, height);
return hash;
}
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
return std::tie(renderpass, views, width, height) ==
std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::FramebufferCacheKey> {
std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class BufferBindings;
struct ImageView {
View view;
vk::ImageLayout* layout = nullptr;
};
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKScheduler& scheduler);
~RasterizerVulkan() override;
bool DrawBatch(bool is_indexed) override;
bool DrawMultiBatch(bool is_indexed) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private:
struct DrawParameters {
void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
u32 base_instance = 0;
u32 num_instances = 0;
u32 base_vertex = 0;
u32 num_vertices = 0;
bool is_indexed = 0;
};
using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
void Draw(bool is_indexed, bool is_instanced);
void FlushWork();
Texceptions UpdateAttachments();
std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
/// Setups geometry buffers and state.
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions,
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
const View& zeta_attachment);
void UpdateDynamicStates();
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
BufferBindings& buffer_bindings);
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
/// Setup constant buffers in the graphics pipeline.
void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup texel buffers in the graphics pipeline.
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
/// Setup constant buffers in the compute pipeline.
void SetupComputeConstBuffers(const ShaderEntries& entries);
/// Setup global buffers in the compute pipeline.
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
void SetupComputeTexelBuffers(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
void SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer);
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
std::size_t CalculateComputeStreamBufferSize() const;
std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) const;
RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
Core::System& system;
Core::Frontend::EmuWindow& render_window;
VKScreenInfo& screen_info;
const VKDevice& device;
VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
QuadArrayPass quad_array_pass;
Uint8Pass uint8_pass;
VKTextureCache texture_cache;
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment;
std::vector<ImageView> sampled_views;
std::vector<ImageView> image_views;
u32 draw_counter = 0;
// TODO(Rodrigo): Invalidate on image destruction
std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
};
} // namespace Vulkan

View File

@@ -0,0 +1,100 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <vector>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
namespace Vulkan {
VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
VKRenderPassCache::~VKRenderPassCache() = default;
vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
const auto [pair, is_cache_miss] = cache.try_emplace(params);
auto& entry = pair->second;
if (is_cache_miss) {
entry = CreateRenderPass(params);
}
return *entry;
}
UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
std::vector<vk::AttachmentDescription> descriptors;
std::vector<vk::AttachmentReference> color_references;
for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
const auto attachment = params.color_attachments[rt];
const auto format =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<u32>(attachment.pixel_format));
// TODO(Rodrigo): Add eMayAlias when it's needed.
const auto color_layout = attachment.is_texception
? vk::ImageLayout::eGeneral
: vk::ImageLayout::eColorAttachmentOptimal;
descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
color_references.emplace_back(static_cast<u32>(rt), color_layout);
}
vk::AttachmentReference zeta_attachment_ref;
if (params.has_zeta) {
const auto format =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
static_cast<u32>(params.zeta_pixel_format));
const auto zeta_layout = params.zeta_texception
? vk::ImageLayout::eGeneral
: vk::ImageLayout::eDepthStencilAttachmentOptimal;
descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
zeta_attachment_ref =
vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
}
const vk::SubpassDescription subpass_description(
{}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
nullptr);
vk::AccessFlags access;
vk::PipelineStageFlags stage;
if (!color_references.empty()) {
access |=
vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
}
if (params.has_zeta) {
access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
}
const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
{});
const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
descriptors.data(), 1, &subpass_description, 1,
&subpass_dependency);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
return dev.createRenderPassUnique(create_info, nullptr, dld);
}
} // namespace Vulkan

View File

@@ -0,0 +1,97 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <tuple>
#include <unordered_map>
#include <boost/container/static_vector.hpp>
#include <boost/functional/hash.hpp>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/surface.h"
namespace Vulkan {
class VKDevice;
// TODO(Rodrigo): Optimize this structure for faster hashing
struct RenderPassParams {
struct ColorAttachment {
u32 index = 0;
VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
bool is_texception = false;
std::size_t Hash() const noexcept {
return static_cast<std::size_t>(pixel_format) |
static_cast<std::size_t>(is_texception) << 6 |
static_cast<std::size_t>(index) << 7;
}
bool operator==(const ColorAttachment& rhs) const noexcept {
return std::tie(index, pixel_format, is_texception) ==
std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
}
};
boost::container::static_vector<ColorAttachment,
Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_attachments{};
// TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
bool has_zeta = false;
bool zeta_texception = false;
std::size_t Hash() const noexcept {
std::size_t hash = 0;
for (const auto& rt : color_attachments) {
boost::hash_combine(hash, rt.Hash());
}
boost::hash_combine(hash, zeta_pixel_format);
boost::hash_combine(hash, has_zeta);
boost::hash_combine(hash, zeta_texception);
return hash;
}
bool operator==(const RenderPassParams& rhs) const {
return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
rhs.zeta_texception);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::RenderPassParams> {
std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class VKRenderPassCache final {
public:
explicit VKRenderPassCache(const VKDevice& device);
~VKRenderPassCache();
vk::RenderPass GetRenderPass(const RenderPassParams& params);
private:
UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
const VKDevice& device;
std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
};
} // namespace Vulkan

View File

@@ -72,12 +72,22 @@ VKFence::VKFence(const VKDevice& device, UniqueFence handle)
VKFence::~VKFence() = default;
void VKFence::Wait() {
static constexpr u64 timeout = std::numeric_limits<u64>::max();
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) {
case vk::Result::eSuccess:
return;
case vk::Result::eErrorDeviceLost:
device.ReportLoss();
[[fallthrough]];
default:
vk::throwResultException(result, "vk::waitForFences");
}
}
void VKFence::Release() {
ASSERT(is_owned);
is_owned = false;
}
@@ -133,8 +143,32 @@ void VKFence::Unprotect(VKResource* resource) {
protected_resources.erase(it);
}
void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
new_resource);
}
VKFenceWatch::VKFenceWatch() = default;
VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
Watch(initial_fence);
}
VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
fence = std::exchange(rhs.fence, nullptr);
if (fence) {
fence->RedirectProtection(&rhs, this);
}
}
VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
fence = std::exchange(rhs.fence, nullptr);
if (fence) {
fence->RedirectProtection(&rhs, this);
}
return *this;
}
VKFenceWatch::~VKFenceWatch() {
if (fence) {
fence->Unprotect(this);

View File

@@ -65,6 +65,9 @@ public:
/// Removes protection for a resource.
void Unprotect(VKResource* resource);
/// Redirects one protected resource to a new address.
void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
/// Retreives the fence.
operator vk::Fence() const {
return *handle;
@@ -97,8 +100,13 @@ private:
class VKFenceWatch final : public VKResource {
public:
explicit VKFenceWatch();
VKFenceWatch(VKFence& initial_fence);
VKFenceWatch(VKFenceWatch&&) noexcept;
VKFenceWatch(const VKFenceWatch&) = delete;
~VKFenceWatch() override;
VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
/// Waits for the fence to be released.
void Wait();
@@ -116,6 +124,14 @@ public:
void OnFenceRemoval(VKFence* signaling_fence) override;
/**
* Do not use it paired with Watch. Use TryWatch instead.
* Returns true when the watch is free.
*/
bool IsUsed() const {
return fence != nullptr;
}
private:
VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
};

View File

@@ -46,9 +46,10 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
{}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
MaxwellToVK::Sampler::Filter(tsc.min_filter),
MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
max_anisotropy, tsc.depth_compare_enabled,
MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
unnormalized_coords);

View File

@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@
namespace Vulkan {
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
: device{device}, resource_manager{resource_manager} {
next_fence = &resource_manager.CommitFence();
AllocateNewContext();
MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
const vk::DispatchLoaderDynamic& dld) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf, dld);
command->~Command();
command = next;
}
command_offset = 0;
first = nullptr;
last = nullptr;
}
VKScheduler::~VKScheduler() = default;
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
: device{device}, resource_manager{resource_manager}, next_fence{
&resource_manager.CommitFence()} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
VKScheduler::~VKScheduler() {
quit = true;
cv.notify_all();
worker_thread.join();
}
void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
if (release_fence)
if (release_fence) {
current_fence->Release();
}
AllocateNewContext();
}
void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
if (release_fence)
if (release_fence) {
current_fence->Release();
}
AllocateNewContext();
}
void VKScheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
bool finished = false;
do {
cv.notify_all();
std::unique_lock lock{mutex};
finished = chunk_queue.Empty();
} while (!finished);
}
void VKScheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
chunk_queue.Push(std::move(chunk));
cv.notify_all();
AcquireNewChunk();
}
void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
if (state.renderpass && renderpass_bi == *state.renderpass) {
return;
}
const bool end_renderpass = state.renderpass.has_value();
state.renderpass = renderpass_bi;
Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
if (end_renderpass) {
cmdbuf.endRenderPass(dld);
}
cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
});
}
void VKScheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
if (state.graphics_pipeline == pipeline) {
return;
}
state.graphics_pipeline = pipeline;
Record([pipeline](auto cmdbuf, auto& dld) {
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
});
}
void VKScheduler::WorkerThread() {
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
if (quit) {
continue;
}
auto extracted_chunk = std::move(chunk_queue.Front());
chunk_queue.Pop();
extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
chunk_reserve.Push(std::move(extracted_chunk));
} while (!quit);
}
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
EndPendingOperations();
InvalidateState();
WaitWorker();
std::unique_lock lock{mutex};
const auto queue = device.GetGraphicsQueue();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld);
const auto queue = device.GetGraphicsQueue();
const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
&semaphore);
queue.submit({submit_info}, *current_fence, dld);
queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
}
void VKScheduler::AllocateNewContext() {
std::unique_lock lock{mutex};
current_fence = next_fence;
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
device.GetDispatchLoader());
}
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.viewports = false;
state.scissors = false;
state.depth_bias = false;
state.blend_constants = false;
state.depth_bounds = false;
state.stencil_values = false;
}
void VKScheduler::EndPendingOperations() {
EndRenderPass();
}
void VKScheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
state.renderpass = std::nullopt;
Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
}
void VKScheduler::AcquireNewChunk() {
if (chunk_reserve.Empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.Front());
chunk_reserve.Pop();
}
} // namespace Vulkan

View File

@@ -4,7 +4,14 @@
#pragma once
#include <condition_variable>
#include <memory>
#include <optional>
#include <stack>
#include <thread>
#include <utility>
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
@@ -30,23 +37,6 @@ private:
VKFence* const& fence;
};
class VKCommandBufferView {
public:
VKCommandBufferView() = default;
VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
const vk::CommandBuffer* operator->() const noexcept {
return &cmdbuf;
}
operator vk::CommandBuffer() const noexcept {
return cmdbuf;
}
private:
const vk::CommandBuffer& cmdbuf;
};
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
@@ -54,32 +44,190 @@ public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
~VKScheduler();
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
/// Gets a reference to the current command buffer.
VKCommandBufferView GetCommandBuffer() const {
return current_cmdbuf;
}
/// Sends the current execution context to the GPU.
void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(vk::Pipeline pipeline);
/// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() {
return std::exchange(state.viewports, true);
}
/// Returns true when scissors have been set in the current command buffer.
bool TouchScissors() {
return std::exchange(state.scissors, true);
}
/// Returns true when depth bias have been set in the current command buffer.
bool TouchDepthBias() {
return std::exchange(state.depth_bias, true);
}
/// Returns true when blend constants have been set in the current command buffer.
bool TouchBlendConstants() {
return std::exchange(state.blend_constants, true);
}
/// Returns true when depth bounds have been set in the current command buffer.
bool TouchDepthBounds() {
return std::exchange(state.depth_bounds, true);
}
/// Returns true when stencil values have been set in the current command buffer.
bool TouchStencilValues() {
return std::exchange(state.stencil_values, true);
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf,
const vk::DispatchLoaderDynamic& dld) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command) : command{std::move(command)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf,
const vk::DispatchLoaderDynamic& dld) const override {
command(cmdbuf, dld);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
bool Empty() const {
return command_offset == 0;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t command_offset = 0;
std::array<u8, 0x8000> data{};
};
void WorkerThread();
void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext();
void InvalidateState();
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
const VKDevice& device;
VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
struct State {
std::optional<vk::RenderPassBeginInfo> renderpass;
vk::Pipeline graphics_pipeline;
bool viewports = false;
bool scissors = false;
bool depth_bias = false;
bool blend_constants = false;
bool depth_bounds = false;
bool stencil_values = false;
} state;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
bool quit = false;
};
} // namespace Vulkan

View File

@@ -543,7 +543,7 @@ private:
}
for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
if (!IsRenderTargetUsed(rt)) {
if (!specialization.enabled_rendertargets[rt]) {
continue;
}
@@ -954,6 +954,10 @@ private:
Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
if (const auto amend_index = operation->GetAmendIndex()) {
[[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
ASSERT(type == Type::Void);
}
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
if (const auto amend_index = conditional->GetAmendIndex()) {
[[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
ASSERT(type == Type::Void);
}
// It's invalid to call conditional on nested nodes, use an operation instead
const Id true_label = OpLabel();
const Id skip_label = OpLabel();
@@ -1555,26 +1563,11 @@ private:
Expression Texture(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
UNIMPLEMENTED_IF(!meta.aoffi.empty());
const bool can_implicit = stage == ShaderType::Fragment;
const Id sampler = GetTextureSampler(operation);
const Id coords = GetCoordinates(operation, Type::Float);
if (meta.depth_compare) {
// Depth sampling
UNIMPLEMENTED_IF(meta.bias);
const Id dref = AsFloat(Visit(meta.depth_compare));
if (can_implicit) {
return {OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, {}),
Type::Float};
} else {
return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref,
spv::ImageOperandsMask::Lod, v_float_zero),
Type::Float};
}
}
std::vector<Id> operands;
spv::ImageOperandsMask mask{};
if (meta.bias) {
@@ -1582,13 +1575,36 @@ private:
operands.push_back(AsFloat(Visit(meta.bias)));
}
if (!can_implicit) {
mask = mask | spv::ImageOperandsMask::Lod;
operands.push_back(v_float_zero);
}
if (!meta.aoffi.empty()) {
mask = mask | spv::ImageOperandsMask::Offset;
operands.push_back(GetOffsetCoordinates(operation));
}
if (meta.depth_compare) {
// Depth sampling
UNIMPLEMENTED_IF(meta.bias);
const Id dref = AsFloat(Visit(meta.depth_compare));
if (can_implicit) {
return {
OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
Type::Float};
} else {
return {
OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
Type::Float};
}
}
Id texture;
if (can_implicit) {
texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
} else {
texture = OpImageSampleExplicitLod(t_float4, sampler, coords,
mask | spv::ImageOperandsMask::Lod, v_float_zero,
operands);
texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
}
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1601,7 +1617,8 @@ private:
const Id lod = AsFloat(Visit(meta.lod));
spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
std::vector<Id> operands;
std::vector<Id> operands{lod};
if (!meta.aoffi.empty()) {
mask = mask | spv::ImageOperandsMask::Offset;
operands.push_back(GetOffsetCoordinates(operation));
@@ -1609,11 +1626,10 @@ private:
if (meta.sampler.IsShadow()) {
const Id dref = AsFloat(Visit(meta.depth_compare));
return {
OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, lod, operands),
Type::Float};
return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
Type::Float};
}
const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, lod, operands);
const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1722,7 +1738,7 @@ private:
const std::vector grad = {dx, dy};
static constexpr auto mask = spv::ImageOperandsMask::Grad;
const Id texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, grad);
const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1780,6 +1796,11 @@ private:
return {};
}
Expression UAtomicAdd(Operation) {
UNIMPLEMENTED();
return {};
}
Expression Branch(Operation operation) {
const auto& target = std::get<ImmediateNode>(*operation[0]);
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -1833,7 +1854,7 @@ private:
}
void PreExit() {
if (stage == ShaderType::Vertex) {
if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
const u32 position_index = out_indices.position.value();
const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
@@ -1860,12 +1881,18 @@ private:
// rendertargets/components are skipped in the register assignment.
u32 current_reg = 0;
for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (!specialization.enabled_rendertargets[rt]) {
// Skip rendertargets that are not enabled
continue;
}
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component);
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
SafeGetRegister(current_reg));
OpStore(pointer, SafeGetRegister(current_reg));
++current_reg;
} else {
OpStore(pointer, component == 3 ? v_float_one : v_float_zero);
}
}
}
@@ -1971,6 +1998,18 @@ private:
return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
}
Expression MemoryBarrierGL(Operation) {
const auto scope = spv::Scope::Device;
const auto semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory |
spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
Constant(t_uint, static_cast<u32>(semantics)));
return {};
}
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
const Id id = OpVariable(type, storage);
Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
@@ -1983,15 +2022,6 @@ private:
return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
}
bool IsRenderTargetUsed(u32 rt) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
return true;
}
}
return false;
}
template <typename... Args>
Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
std::vector<Id> members;
@@ -2348,6 +2378,8 @@ private:
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::UAtomicAdd,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
&SPIRVDecompiler::PushFlowStack,
@@ -2374,6 +2406,8 @@ private:
&SPIRVDecompiler::ThreadId,
&SPIRVDecompiler::ShuffleIndexed,
&SPIRVDecompiler::MemoryBarrierGL,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2538,29 +2572,7 @@ public:
}
Id operator()(const ExprCondCode& expr) {
const Node cc = decomp.ir.GetConditionCode(expr.cc);
Id target;
if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
const auto index = pred->GetIndex();
switch (index) {
case Tegra::Shader::Pred::NeverExecute:
target = decomp.v_false;
break;
case Tegra::Shader::Pred::UnusedIndex:
target = decomp.v_true;
break;
default:
target = decomp.predicates.at(index);
break;
}
} else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
} else {
UNREACHABLE();
}
return decomp.OpLoad(decomp.t_bool, target);
return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
}
Id operator()(const ExprVar& expr) {
@@ -2575,7 +2587,7 @@ public:
const Id target = decomp.Constant(decomp.t_uint, expr.value);
Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
gpr = decomp.OpBitcast(decomp.t_uint, gpr);
return decomp.OpLogicalEqual(decomp.t_uint, gpr, target);
return decomp.OpIEqual(decomp.t_bool, gpr, target);
}
Id Visit(const Expr& node) {
@@ -2645,11 +2657,11 @@ public:
const Id loop_label = decomp.OpLabel();
const Id endloop_label = decomp.OpLabel();
const Id loop_start_block = decomp.OpLabel();
const Id loop_end_block = decomp.OpLabel();
const Id loop_continue_block = decomp.OpLabel();
current_loop_exit = endloop_label;
decomp.OpBranch(loop_label);
decomp.AddLabel(loop_label);
decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone);
decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
decomp.OpBranch(loop_start_block);
decomp.AddLabel(loop_start_block);
ASTNode current = ast.nodes.GetFirst();
@@ -2657,6 +2669,8 @@ public:
Visit(current);
current = current->GetNext();
}
decomp.OpBranch(loop_continue_block);
decomp.AddLabel(loop_continue_block);
ExprDecompiler expr_parser{decomp};
const Id condition = expr_parser.Visit(ast.condition);
decomp.OpBranchConditional(condition, loop_label, endloop_label);

View File

@@ -94,6 +94,7 @@ struct Specialization final {
Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
// Tessellation specific
struct {
@@ -101,6 +102,9 @@ struct Specialization final {
Maxwell::TessellationSpacing spacing{};
bool clockwise{};
} tessellation;
// Fragment specific
std::bitset<8> enabled_rendertargets;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);

View File

@@ -0,0 +1,34 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <memory>
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
// Avoid undefined behavior by copying to a staging allocation
ASSERT(code_size % sizeof(u32) == 0);
const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
std::memcpy(data.get(), code_data, code_size);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get());
vk::ShaderModule shader_module;
if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) {
UNREACHABLE_MSG("Shader module failed to build!");
}
return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
}
} // namespace Vulkan

View File

@@ -0,0 +1,17 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
class VKDevice;
UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
} // namespace Vulkan

Some files were not shown because too many files have changed in this diff Show More