GPU: Eliminate CB0 accesses when storage buffer accesses are resolved (#3847)

* Eliminate CB0 accesses

Still some work to do, decouple from hle?

* Forgot the important part somehow

* Fix and improve alignment test

* Address Feedback

* Remove some complexity when checking storage buffer alignment

* Update Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
This commit is contained in:
riperiperi
2022-11-17 17:47:41 +00:00
committed by GitHub
parent 391e08dd27
commit 33a4d7d1ba
16 changed files with 317 additions and 68 deletions

View File

@@ -95,5 +95,10 @@ namespace Ryujinx.Graphics.Gpu
/// Byte alignment for block linear textures
/// </summary>
public const int GobAlignment = 64;
/// <summary>
/// Expected byte alignment for storage buffers
/// </summary>
public const int StorageAlignment = 16;
}
}

View File

@@ -138,7 +138,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
qmd.CtaThreadDimension1,
qmd.CtaThreadDimension2,
localMemorySize,
sharedMemorySize);
sharedMemorySize,
_channel.BufferManager.HasUnalignedStorageBuffers);
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
@@ -150,6 +151,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
ShaderProgramInfo info = cs.Shaders[0].Info;
bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
{
// Refetch the shader, as assumptions about storage buffer alignment have changed.
cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
info = cs.Shaders[0].Info;
}
for (int index = 0; index < info.CBuffers.Count; index++)
{
BufferDescriptor cb = info.CBuffers[index];
@@ -174,21 +202,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
_channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
}
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
_channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
_channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);

View File

@@ -293,9 +293,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
private void CommitBindings()
{
var buffers = _channel.BufferManager;
var hasUnaligned = buffers.HasUnalignedStorageBuffers;
UpdateStorageBuffers();
if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState))
if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || (buffers.HasUnalignedStorageBuffers != hasUnaligned))
{
// Shader must be reloaded.
UpdateShaderState();
@@ -1361,7 +1364,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_state.State.AlphaTestFunc,
_state.State.AlphaTestRef,
ref attributeTypes,
_drawState.HasConstantBufferDrawParameters);
_drawState.HasConstantBufferDrawParameters,
_channel.BufferManager.HasUnalignedStorageBuffers);
}
/// <summary>

View File

@@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
private readonly GpuContext _context;
private readonly GpuChannel _channel;
private int _unalignedStorageBuffers;
public bool HasUnalignedStorageBuffers => _unalignedStorageBuffers > 0;
private IndexBuffer _indexBuffer;
private readonly VertexBuffer[] _vertexBuffers;
private readonly BufferBounds[] _transformFeedbackBuffers;
@@ -38,6 +41,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// </summary>
public BufferBounds[] Buffers { get; }
/// <summary>
/// Flag indicating if this binding is unaligned.
/// </summary>
public bool[] Unaligned { get; }
/// <summary>
/// Total amount of buffers used on the shader.
/// </summary>
@@ -51,6 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
Bindings = new BufferDescriptor[count];
Buffers = new BufferBounds[count];
Unaligned = new bool[count];
}
/// <summary>
@@ -202,6 +211,31 @@ namespace Ryujinx.Graphics.Gpu.Memory
_transformFeedbackBuffersDirty = true;
}
/// <summary>
/// Records the alignment of a storage buffer.
/// Unaligned storage buffers disable some optimizations on the shader.
/// </summary>
/// <param name="buffers">The binding list to modify</param>
/// <param name="index">Index of the storage buffer</param>
/// <param name="gpuVa">Start GPU virtual address of the buffer</param>
private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa)
{
bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0;
if (unaligned || HasUnalignedStorageBuffers)
{
// Check if the alignment changed for this binding.
ref bool currentUnaligned = ref buffers.Unaligned[index];
if (currentUnaligned != unaligned)
{
currentUnaligned = unaligned;
_unalignedStorageBuffers += unaligned ? 1 : -1;
}
}
}
/// <summary>
/// Sets a storage buffer on the compute pipeline.
/// Storage buffers can be read and written to on shaders.
@@ -214,6 +248,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
RecordStorageAlignment(_cpStorageBuffers, index, gpuVa);
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
@@ -234,17 +270,21 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
BuffersPerStage buffers = _gpStorageBuffers[stage];
RecordStorageAlignment(buffers, index, gpuVa);
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
if (_gpStorageBuffers[stage].Buffers[index].Address != address ||
_gpStorageBuffers[stage].Buffers[index].Size != size)
if (buffers.Buffers[index].Address != address ||
buffers.Buffers[index].Size != size)
{
_gpStorageBuffersDirty = true;
}
_gpStorageBuffers[stage].SetBounds(index, address, size, flags);
buffers.SetBounds(index, address, size, flags);
}
/// <summary>

View File

@@ -36,6 +36,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="gpuVa">GPU virtual address of the compute shader</param>
/// <param name="program">Cached host program for the given state, if found</param>
/// <param name="cachedGuestCode">Cached guest code, if any found</param>
@@ -43,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool TryFind(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa,
out CachedShaderProgram program,
out byte[] cachedGuestCode)
@@ -50,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
program = null;
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
return hasSpecList && specList.TryFindForCompute(channel, poolState, out program);
return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
}
/// <summary>

View File

@@ -225,6 +225,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
return _oldSpecState.GraphicsState.EarlyZForce;
}
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/>
public bool QueryViewportTransformDisable()
{

View File

@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 3747;
private const uint CodeGenVersion = 3848;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View File

@@ -145,6 +145,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
return _state.GraphicsState.HasConstantBufferDrawParameters;
}
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/>
public InputTopology QueryPrimitiveTopology()
{

View File

@@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
public readonly int SharedMemorySize;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public readonly bool HasUnalignedStorageBuffer;
/// <summary>
/// Creates a new GPU compute state.
/// </summary>
@@ -40,18 +45,21 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="localSizeZ">Local group size Z of the compute shader</param>
/// <param name="localMemorySize">Local memory size of the compute shader</param>
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelComputeState(
int localSizeX,
int localSizeY,
int localSizeZ,
int localMemorySize,
int sharedMemorySize)
int sharedMemorySize,
bool hasUnalignedStorageBuffer)
{
LocalSizeX = localSizeX;
LocalSizeY = localSizeY;
LocalSizeZ = localSizeZ;
LocalMemorySize = localMemorySize;
SharedMemorySize = sharedMemorySize;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
}
}
}

View File

@@ -82,6 +82,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
public readonly bool HasConstantBufferDrawParameters;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public readonly bool HasUnalignedStorageBuffer;
/// <summary>
/// Creates a new GPU graphics state.
/// </summary>
@@ -99,6 +104,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
/// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
/// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelGraphicsState(
bool earlyZForce,
PrimitiveTopology topology,
@@ -113,7 +119,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
CompareOp alphaTestCompare,
float alphaTestReference,
ref Array32<AttributeType> attributeTypes,
bool hasConstantBufferDrawParameters)
bool hasConstantBufferDrawParameters,
bool hasUnalignedStorageBuffer)
{
EarlyZForce = earlyZForce;
Topology = topology;
@@ -129,6 +136,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
AlphaTestReference = alphaTestReference;
AttributeTypes = attributeTypes;
HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
}
}
}

View File

@@ -203,12 +203,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
GpuChannelComputeState computeState,
ulong gpuVa)
{
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa))
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
{
return cpShader;
}
if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode))
if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{
_cpPrograms[gpuVa] = cpShader;
return cpShader;
@@ -473,18 +473,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
CachedShaderProgram cpShader,
ulong gpuVa)
{
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{
return cpShader.SpecializationState.MatchesCompute(channel, poolState, true);
return cpShader.SpecializationState.MatchesCompute(channel, poolState, computeState, true);
}
return false;

View File

@@ -53,13 +53,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="program">Cached program, if found</param>
/// <returns>True if a compatible program is found, false otherwise</returns>
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program)
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
{
foreach (var entry in _entries)
{
if (entry.SpecializationState.MatchesCompute(channel, poolState, true))
if (entry.SpecializationState.MatchesCompute(channel, poolState, computeState, true))
{
program = entry;
return true;

View File

@@ -531,6 +531,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
return false;
}
if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
{
return false;
}
return Matches(channel, poolState, checkTextures, isCompute: false);
}
@@ -539,10 +544,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns>
public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures)
public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
{
if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
{
return false;
}
return Matches(channel, poolState, checkTextures, isCompute: true);
}