GPU: Eliminate CB0 accesses when storage buffer accesses are resolved (#3847)
* Eliminate CB0 accesses Still some work to do, decouple from hle? * Forgot the important part somehow * Fix and improve alignment test * Address Feedback * Remove some complexity when checking storage buffer alignment * Update Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs Co-authored-by: gdkchan <gab.dark.100@gmail.com> Co-authored-by: gdkchan <gab.dark.100@gmail.com>
This commit is contained in:
@@ -95,5 +95,10 @@ namespace Ryujinx.Graphics.Gpu
|
||||
/// Byte alignment for block linear textures
|
||||
/// </summary>
|
||||
public const int GobAlignment = 64;
|
||||
|
||||
/// <summary>
|
||||
/// Expected byte alignment for storage buffers
|
||||
/// </summary>
|
||||
public const int StorageAlignment = 16;
|
||||
}
|
||||
}
|
@@ -138,7 +138,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
|
||||
qmd.CtaThreadDimension1,
|
||||
qmd.CtaThreadDimension2,
|
||||
localMemorySize,
|
||||
sharedMemorySize);
|
||||
sharedMemorySize,
|
||||
_channel.BufferManager.HasUnalignedStorageBuffers);
|
||||
|
||||
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
|
||||
|
||||
@@ -150,6 +151,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
|
||||
|
||||
ShaderProgramInfo info = cs.Shaders[0].Info;
|
||||
|
||||
bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
|
||||
|
||||
for (int index = 0; index < info.SBuffers.Count; index++)
|
||||
{
|
||||
BufferDescriptor sb = info.SBuffers[index];
|
||||
|
||||
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
|
||||
|
||||
int sbDescOffset = 0x310 + sb.Slot * 0x10;
|
||||
|
||||
sbDescAddress += (ulong)sbDescOffset;
|
||||
|
||||
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
|
||||
|
||||
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
|
||||
}
|
||||
|
||||
if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
|
||||
{
|
||||
// Refetch the shader, as assumptions about storage buffer alignment have changed.
|
||||
cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
|
||||
|
||||
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
|
||||
|
||||
info = cs.Shaders[0].Info;
|
||||
}
|
||||
|
||||
for (int index = 0; index < info.CBuffers.Count; index++)
|
||||
{
|
||||
BufferDescriptor cb = info.CBuffers[index];
|
||||
@@ -174,21 +202,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
|
||||
_channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
|
||||
}
|
||||
|
||||
for (int index = 0; index < info.SBuffers.Count; index++)
|
||||
{
|
||||
BufferDescriptor sb = info.SBuffers[index];
|
||||
|
||||
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
|
||||
|
||||
int sbDescOffset = 0x310 + sb.Slot * 0x10;
|
||||
|
||||
sbDescAddress += (ulong)sbDescOffset;
|
||||
|
||||
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
|
||||
|
||||
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
|
||||
}
|
||||
|
||||
_channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
|
||||
_channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);
|
||||
|
||||
|
@@ -293,9 +293,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
|
||||
/// </summary>
|
||||
private void CommitBindings()
|
||||
{
|
||||
var buffers = _channel.BufferManager;
|
||||
var hasUnaligned = buffers.HasUnalignedStorageBuffers;
|
||||
|
||||
UpdateStorageBuffers();
|
||||
|
||||
if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState))
|
||||
if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || (buffers.HasUnalignedStorageBuffers != hasUnaligned))
|
||||
{
|
||||
// Shader must be reloaded.
|
||||
UpdateShaderState();
|
||||
@@ -1361,7 +1364,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
|
||||
_state.State.AlphaTestFunc,
|
||||
_state.State.AlphaTestRef,
|
||||
ref attributeTypes,
|
||||
_drawState.HasConstantBufferDrawParameters);
|
||||
_drawState.HasConstantBufferDrawParameters,
|
||||
_channel.BufferManager.HasUnalignedStorageBuffers);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
private readonly GpuContext _context;
|
||||
private readonly GpuChannel _channel;
|
||||
|
||||
private int _unalignedStorageBuffers;
|
||||
public bool HasUnalignedStorageBuffers => _unalignedStorageBuffers > 0;
|
||||
|
||||
private IndexBuffer _indexBuffer;
|
||||
private readonly VertexBuffer[] _vertexBuffers;
|
||||
private readonly BufferBounds[] _transformFeedbackBuffers;
|
||||
@@ -38,6 +41,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
/// </summary>
|
||||
public BufferBounds[] Buffers { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Flag indicating if this binding is unaligned.
|
||||
/// </summary>
|
||||
public bool[] Unaligned { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Total amount of buffers used on the shader.
|
||||
/// </summary>
|
||||
@@ -51,6 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
{
|
||||
Bindings = new BufferDescriptor[count];
|
||||
Buffers = new BufferBounds[count];
|
||||
Unaligned = new bool[count];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -202,6 +211,31 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
_transformFeedbackBuffersDirty = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records the alignment of a storage buffer.
|
||||
/// Unaligned storage buffers disable some optimizations on the shader.
|
||||
/// </summary>
|
||||
/// <param name="buffers">The binding list to modify</param>
|
||||
/// <param name="index">Index of the storage buffer</param>
|
||||
/// <param name="gpuVa">Start GPU virtual address of the buffer</param>
|
||||
private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa)
|
||||
{
|
||||
bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0;
|
||||
|
||||
if (unaligned || HasUnalignedStorageBuffers)
|
||||
{
|
||||
// Check if the alignment changed for this binding.
|
||||
|
||||
ref bool currentUnaligned = ref buffers.Unaligned[index];
|
||||
|
||||
if (currentUnaligned != unaligned)
|
||||
{
|
||||
currentUnaligned = unaligned;
|
||||
_unalignedStorageBuffers += unaligned ? 1 : -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets a storage buffer on the compute pipeline.
|
||||
/// Storage buffers can be read and written to on shaders.
|
||||
@@ -214,6 +248,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
{
|
||||
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
|
||||
|
||||
RecordStorageAlignment(_cpStorageBuffers, index, gpuVa);
|
||||
|
||||
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
|
||||
|
||||
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
|
||||
@@ -234,17 +270,21 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
{
|
||||
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
|
||||
|
||||
BuffersPerStage buffers = _gpStorageBuffers[stage];
|
||||
|
||||
RecordStorageAlignment(buffers, index, gpuVa);
|
||||
|
||||
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
|
||||
|
||||
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
|
||||
|
||||
if (_gpStorageBuffers[stage].Buffers[index].Address != address ||
|
||||
_gpStorageBuffers[stage].Buffers[index].Size != size)
|
||||
if (buffers.Buffers[index].Address != address ||
|
||||
buffers.Buffers[index].Size != size)
|
||||
{
|
||||
_gpStorageBuffersDirty = true;
|
||||
}
|
||||
|
||||
_gpStorageBuffers[stage].SetBounds(index, address, size, flags);
|
||||
buffers.SetBounds(index, address, size, flags);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@@ -36,6 +36,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
/// <param name="channel">GPU channel</param>
|
||||
/// <param name="poolState">Texture pool state</param>
|
||||
/// <param name="computeState">Compute state</param>
|
||||
/// <param name="gpuVa">GPU virtual address of the compute shader</param>
|
||||
/// <param name="program">Cached host program for the given state, if found</param>
|
||||
/// <param name="cachedGuestCode">Cached guest code, if any found</param>
|
||||
@@ -43,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
public bool TryFind(
|
||||
GpuChannel channel,
|
||||
GpuChannelPoolState poolState,
|
||||
GpuChannelComputeState computeState,
|
||||
ulong gpuVa,
|
||||
out CachedShaderProgram program,
|
||||
out byte[] cachedGuestCode)
|
||||
@@ -50,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
program = null;
|
||||
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
|
||||
bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
|
||||
return hasSpecList && specList.TryFindForCompute(channel, poolState, out program);
|
||||
return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@@ -225,6 +225,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
||||
return _oldSpecState.GraphicsState.EarlyZForce;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool QueryHasUnalignedStorageBuffer()
|
||||
{
|
||||
return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool QueryViewportTransformDisable()
|
||||
{
|
||||
|
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
||||
private const ushort FileFormatVersionMajor = 1;
|
||||
private const ushort FileFormatVersionMinor = 2;
|
||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||
private const uint CodeGenVersion = 3747;
|
||||
private const uint CodeGenVersion = 3848;
|
||||
|
||||
private const string SharedTocFileName = "shared.toc";
|
||||
private const string SharedDataFileName = "shared.data";
|
||||
|
@@ -145,6 +145,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
return _state.GraphicsState.HasConstantBufferDrawParameters;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public bool QueryHasUnalignedStorageBuffer()
|
||||
{
|
||||
return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public InputTopology QueryPrimitiveTopology()
|
||||
{
|
||||
|
@@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
public readonly int SharedMemorySize;
|
||||
|
||||
/// <summary>
|
||||
/// Indicates that any storage buffer use is unaligned.
|
||||
/// </summary>
|
||||
public readonly bool HasUnalignedStorageBuffer;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new GPU compute state.
|
||||
/// </summary>
|
||||
@@ -40,18 +45,21 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// <param name="localSizeZ">Local group size Z of the compute shader</param>
|
||||
/// <param name="localMemorySize">Local memory size of the compute shader</param>
|
||||
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
|
||||
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
|
||||
public GpuChannelComputeState(
|
||||
int localSizeX,
|
||||
int localSizeY,
|
||||
int localSizeZ,
|
||||
int localMemorySize,
|
||||
int sharedMemorySize)
|
||||
int sharedMemorySize,
|
||||
bool hasUnalignedStorageBuffer)
|
||||
{
|
||||
LocalSizeX = localSizeX;
|
||||
LocalSizeY = localSizeY;
|
||||
LocalSizeZ = localSizeZ;
|
||||
LocalMemorySize = localMemorySize;
|
||||
SharedMemorySize = sharedMemorySize;
|
||||
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
|
||||
}
|
||||
}
|
||||
}
|
@@ -82,6 +82,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
public readonly bool HasConstantBufferDrawParameters;
|
||||
|
||||
/// <summary>
|
||||
/// Indicates that any storage buffer use is unaligned.
|
||||
/// </summary>
|
||||
public readonly bool HasUnalignedStorageBuffer;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new GPU graphics state.
|
||||
/// </summary>
|
||||
@@ -99,6 +104,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
|
||||
/// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
|
||||
/// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
|
||||
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
|
||||
public GpuChannelGraphicsState(
|
||||
bool earlyZForce,
|
||||
PrimitiveTopology topology,
|
||||
@@ -113,7 +119,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
CompareOp alphaTestCompare,
|
||||
float alphaTestReference,
|
||||
ref Array32<AttributeType> attributeTypes,
|
||||
bool hasConstantBufferDrawParameters)
|
||||
bool hasConstantBufferDrawParameters,
|
||||
bool hasUnalignedStorageBuffer)
|
||||
{
|
||||
EarlyZForce = earlyZForce;
|
||||
Topology = topology;
|
||||
@@ -129,6 +136,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
AlphaTestReference = alphaTestReference;
|
||||
AttributeTypes = attributeTypes;
|
||||
HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
|
||||
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
|
||||
}
|
||||
}
|
||||
}
|
@@ -203,12 +203,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
GpuChannelComputeState computeState,
|
||||
ulong gpuVa)
|
||||
{
|
||||
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa))
|
||||
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
|
||||
{
|
||||
return cpShader;
|
||||
}
|
||||
|
||||
if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode))
|
||||
if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
|
||||
{
|
||||
_cpPrograms[gpuVa] = cpShader;
|
||||
return cpShader;
|
||||
@@ -473,18 +473,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
/// <param name="channel">GPU channel using the shader</param>
|
||||
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
|
||||
/// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
|
||||
/// <param name="cpShader">Cached compute shader</param>
|
||||
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
|
||||
/// <returns>True if the code is different, false otherwise</returns>
|
||||
private static bool IsShaderEqual(
|
||||
GpuChannel channel,
|
||||
GpuChannelPoolState poolState,
|
||||
GpuChannelComputeState computeState,
|
||||
CachedShaderProgram cpShader,
|
||||
ulong gpuVa)
|
||||
{
|
||||
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
|
||||
{
|
||||
return cpShader.SpecializationState.MatchesCompute(channel, poolState, true);
|
||||
return cpShader.SpecializationState.MatchesCompute(channel, poolState, computeState, true);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@@ -53,13 +53,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
/// <param name="channel">GPU channel</param>
|
||||
/// <param name="poolState">Texture pool state</param>
|
||||
/// <param name="computeState">Compute state</param>
|
||||
/// <param name="program">Cached program, if found</param>
|
||||
/// <returns>True if a compatible program is found, false otherwise</returns>
|
||||
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program)
|
||||
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
|
||||
{
|
||||
foreach (var entry in _entries)
|
||||
{
|
||||
if (entry.SpecializationState.MatchesCompute(channel, poolState, true))
|
||||
if (entry.SpecializationState.MatchesCompute(channel, poolState, computeState, true))
|
||||
{
|
||||
program = entry;
|
||||
return true;
|
||||
|
@@ -531,6 +531,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
return false;
|
||||
}
|
||||
|
||||
if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return Matches(channel, poolState, checkTextures, isCompute: false);
|
||||
}
|
||||
|
||||
@@ -539,10 +544,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// </summary>
|
||||
/// <param name="channel">GPU channel</param>
|
||||
/// <param name="poolState">Texture pool state</param>
|
||||
/// <param name="computeState">Compute state</param>
|
||||
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
|
||||
/// <returns>True if the state matches, false otherwise</returns>
|
||||
public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures)
|
||||
public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
|
||||
{
|
||||
if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return Matches(channel, poolState, checkTextures, isCompute: true);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user