Compare commits

..

7 Commits

Author SHA1 Message Date
riperiperi
fd6d3ec88f Fix res scale parameters not being updated in vertex shader (#3046)
This fixes an issue where the render scale array would not be updated when technically the scales on the flat array were the same, but the start index for the vertex scales was different.
2022-01-27 14:17:13 -03:00
edisionnano
0a0a95fd81 Convert Octal-Mode to Decimal (#3041)
Apparently C# doesn't use 0 as a prefix like C does.
2022-01-25 23:31:04 +01:00
Mary
26019c7d06 Fix regression on PR builds version number since new release system 2022-01-24 18:49:14 +01:00
gdkchan
f3bfd799e1 Fix calls passing V128 values on Linux (#3034)
* Fix calls passing V128 values on Linux

* PPTC version bump
2022-01-24 11:23:24 +01:00
Mary
b2ebbe8b22 amadeus: Fix possible device sink input out of bound (#3032)
This fix an out of bound when indexing inputs for games that uses
unsupported values (8 here)

Close #2724.
2022-01-23 23:36:31 +01:00
Mary
4910b214f5 Set _vibrationPermitted to True by default (#2985)
Co-authored-by: SpookyBee123 <82302189+SpookyBee123@users.noreply.github.com>
2022-01-23 12:24:55 +01:00
gdkchan
42c75dbb8f Add support for BC1/2/3 decompression (for 3D textures) (#2987)
* Add support for BC1/2/3 decompression (for 3D textures)

* Optimize and clean up

* Unsafe not needed here

* Fix alpha value interpolation when a0 <= a1
2022-01-22 19:23:00 +01:00
15 changed files with 750 additions and 166 deletions

View File

@@ -796,6 +796,8 @@ namespace ARMeilleure.CodeGen.X86
}
}
node.SetSources(sources.ToArray());
if (dest != default)
{
if (dest.Type == OperandType.V128)
@@ -823,8 +825,6 @@ namespace ARMeilleure.CodeGen.X86
node.Destination = retReg;
}
}
node.SetSources(sources.ToArray());
}
private static void HandleTailcallSystemVAbi(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)

View File

@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 3015; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 3034; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";

View File

@@ -52,7 +52,7 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
InputCount = sink.Parameter.InputCount;
InputBufferIndices = new ushort[InputCount];
for (int i = 0; i < InputCount; i++)
for (int i = 0; i < Math.Min(InputCount, Constants.ChannelCountMax); i++)
{
InputBufferIndices[i] = (ushort)(bufferOffset + sink.Parameter.Input[i]);
}

View File

@@ -1,4 +1,6 @@
namespace Ryujinx.Common
using System.Reflection;
namespace Ryujinx.Common
{
// DO NOT EDIT, filled by CI
public static class ReleaseInformations
@@ -25,7 +27,7 @@
}
else
{
return "1.0.0-dirty";
return Assembly.GetEntryAssembly().GetCustomAttribute<AssemblyInformationalVersionAttribute>().InformationalVersion;
}
}
}

View File

@@ -2,30 +2,32 @@ namespace Ryujinx.Graphics.GAL
{
public struct Capabilities
{
public bool HasFrontFacingBug { get; }
public bool HasVectorIndexingBug { get; }
public readonly bool HasFrontFacingBug;
public readonly bool HasVectorIndexingBug;
public bool SupportsAstcCompression { get; }
public bool SupportsBgraFormat { get; }
public bool SupportsR4G4Format { get; }
public bool SupportsFragmentShaderInterlock { get; }
public bool SupportsFragmentShaderOrderingIntel { get; }
public bool SupportsImageLoadFormatted { get; }
public bool SupportsMismatchingViewFormat { get; }
public bool SupportsNonConstantTextureOffset { get; }
public bool SupportsShaderBallot { get; }
public bool SupportsTextureShadowLod { get; }
public bool SupportsViewportSwizzle { get; }
public bool SupportsIndirectParameters { get; }
public readonly bool SupportsAstcCompression;
public readonly bool Supports3DTextureCompression;
public readonly bool SupportsBgraFormat;
public readonly bool SupportsR4G4Format;
public readonly bool SupportsFragmentShaderInterlock;
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsImageLoadFormatted;
public readonly bool SupportsMismatchingViewFormat;
public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportSwizzle;
public readonly bool SupportsIndirectParameters;
public int MaximumComputeSharedMemorySize { get; }
public float MaximumSupportedAnisotropy { get; }
public int StorageBufferOffsetAlignment { get; }
public readonly int MaximumComputeSharedMemorySize;
public readonly float MaximumSupportedAnisotropy;
public readonly int StorageBufferOffsetAlignment;
public Capabilities(
bool hasFrontFacingBug,
bool hasVectorIndexingBug,
bool supportsAstcCompression,
bool supports3DTextureCompression,
bool supportsBgraFormat,
bool supportsR4G4Format,
bool supportsFragmentShaderInterlock,
@@ -44,6 +46,7 @@ namespace Ryujinx.Graphics.GAL
HasFrontFacingBug = hasFrontFacingBug;
HasVectorIndexingBug = hasVectorIndexingBug;
SupportsAstcCompression = supportsAstcCompression;
Supports3DTextureCompression = supports3DTextureCompression;
SupportsBgraFormat = supportsBgraFormat;
SupportsR4G4Format = supportsR4G4Format;
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;

View File

@@ -67,11 +67,9 @@ namespace Ryujinx.Graphics.GAL
R10G10B10A2Uint,
R11G11B10Float,
R9G9B9E5Float,
Bc1RgbUnorm,
Bc1RgbaUnorm,
Bc2Unorm,
Bc3Unorm,
Bc1RgbSrgb,
Bc1RgbaSrgb,
Bc2Srgb,
Bc3Srgb,
@@ -349,25 +347,5 @@ namespace Ryujinx.Graphics.GAL
{
return format.IsUint() || format.IsSint();
}
/// <summary>
/// Checks if the texture format is a BC4 compressed format.
/// </summary>
/// <param name="format">Texture format</param>
/// <returns>True if the texture format is a BC4 compressed format, false otherwise</returns>
public static bool IsBc4(this Format format)
{
return format == Format.Bc4Unorm || format == Format.Bc4Snorm;
}
/// <summary>
/// Checks if the texture format is a BC5 compressed format.
/// </summary>
/// <param name="format">Texture format</param>
/// <returns>True if the texture format is a BC5 compressed format, false otherwise</returns>
public static bool IsBc5(this Format format)
{
return format == Format.Bc5Unorm || format == Format.Bc5Snorm;
}
}
}

View File

@@ -78,14 +78,27 @@ namespace Ryujinx.Graphics.Gpu
/// <summary>
/// Host hardware capabilities.
/// </summary>
internal Capabilities Capabilities => _caps.Value;
internal ref Capabilities Capabilities
{
get
{
if (!_capsLoaded)
{
_caps = Renderer.GetCapabilities();
_capsLoaded = true;
}
return ref _caps;
}
}
/// <summary>
/// Event for signalling shader cache loading progress.
/// </summary>
public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged;
private readonly Lazy<Capabilities> _caps;
private bool _capsLoaded;
private Capabilities _caps;
private Thread _gpuThread;
/// <summary>
@@ -110,8 +123,6 @@ namespace Ryujinx.Graphics.Gpu
DeferredActions = new Queue<Action>();
PhysicalMemoryRegistry = new ConcurrentDictionary<long, PhysicalMemory>();
_caps = new Lazy<Capabilities>(Renderer.GetCapabilities);
}
/// <summary>

View File

@@ -834,13 +834,31 @@ namespace Ryujinx.Graphics.Gpu.Image
{
data = PixelConverter.ConvertR4G4ToR4G4B4A4(data);
}
else if (Target == Target.Texture3D && Format.IsBc4())
else if (!_context.Capabilities.Supports3DTextureCompression && Target == Target.Texture3D)
{
data = BCnDecoder.DecodeBC4(data, width, height, depth, levels, layers, Info.FormatInfo.Format == Format.Bc4Snorm);
}
else if (Target == Target.Texture3D && Format.IsBc5())
{
data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Info.FormatInfo.Format == Format.Bc5Snorm);
switch (Format)
{
case Format.Bc1RgbaSrgb:
case Format.Bc1RgbaUnorm:
data = BCnDecoder.DecodeBC1(data, width, height, depth, levels, layers);
break;
case Format.Bc2Srgb:
case Format.Bc2Unorm:
data = BCnDecoder.DecodeBC2(data, width, height, depth, levels, layers);
break;
case Format.Bc3Srgb:
case Format.Bc3Unorm:
data = BCnDecoder.DecodeBC3(data, width, height, depth, levels, layers);
break;
case Format.Bc4Snorm:
case Format.Bc4Unorm:
data = BCnDecoder.DecodeBC4(data, width, height, depth, levels, layers, Format == Format.Bc4Snorm);
break;
case Format.Bc5Snorm:
case Format.Bc5Unorm:
data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Format == Format.Bc5Snorm);
break;
}
}
return data;

View File

@@ -49,6 +49,7 @@ namespace Ryujinx.Graphics.Gpu.Image
private readonly float[] _scales;
private bool _scaleChanged;
private int _lastFragmentTotal;
/// <summary>
/// Constructs a new instance of the texture bindings manager.
@@ -288,26 +289,30 @@ namespace Ryujinx.Graphics.Gpu.Image
/// </summary>
private void CommitRenderScale()
{
// Stage 0 total: Compute or Vertex.
int total = _textureBindingsCount[0] + _imageBindingsCount[0];
int fragmentIndex = (int)ShaderStage.Fragment - 1;
int fragmentTotal = _isCompute ? 0 : (_textureBindingsCount[fragmentIndex] + _imageBindingsCount[fragmentIndex]);
if (total != 0 && fragmentTotal != _lastFragmentTotal)
{
// Must update scales in the support buffer if:
// - Vertex stage has bindings.
// - Fragment stage binding count has been updated since last render scale update.
_scaleChanged = true;
}
if (_scaleChanged)
{
int fragmentTotal = 0;
int total;
if (!_isCompute)
{
int fragmentIndex = (int)ShaderStage.Fragment - 1;
fragmentTotal = _textureBindingsCount[fragmentIndex] + _imageBindingsCount[fragmentIndex];
int vertexIndex = (int)ShaderStage.Vertex - 1;
int vertexTotal = _textureBindingsCount[vertexIndex] + _imageBindingsCount[vertexIndex];
total = fragmentTotal + vertexTotal;
}
else
{
total = _textureBindingsCount[0] + _imageBindingsCount[0];
total += fragmentTotal; // Add the fragment bindings to the total.
}
_lastFragmentTotal = fragmentTotal;
_context.Renderer.Pipeline.UpdateRenderScale(_scales, total, fragmentTotal);
_scaleChanged = false;

View File

@@ -14,9 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Image
private enum FormatClass
{
Unclassified,
BCn64,
BCn128,
Bc1Rgb,
Bc1Rgba,
Bc2,
Bc3,
@@ -88,13 +85,21 @@ namespace Ryujinx.Graphics.Gpu.Image
return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4);
}
if (info.Target == Target.Texture3D)
if (!caps.Supports3DTextureCompression && info.Target == Target.Texture3D)
{
// The host API does not support 3D BC4/BC5 compressed formats.
// The host API does not support 3D compressed formats.
// We assume software decompression will be done for those textures,
// and so we adjust the format here to match the decompressor output.
switch (info.FormatInfo.Format)
{
case Format.Bc1RgbaSrgb:
case Format.Bc2Srgb:
case Format.Bc3Srgb:
return new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4);
case Format.Bc1RgbaUnorm:
case Format.Bc2Unorm:
case Format.Bc3Unorm:
return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4);
case Format.Bc4Unorm:
return new FormatInfo(Format.R8Unorm, 1, 1, 1, 1);
case Format.Bc4Snorm:
@@ -749,9 +754,6 @@ namespace Ryujinx.Graphics.Gpu.Image
{
switch (format)
{
case Format.Bc1RgbSrgb:
case Format.Bc1RgbUnorm:
return FormatClass.Bc1Rgb;
case Format.Bc1RgbaSrgb:
case Format.Bc1RgbaUnorm:
return FormatClass.Bc1Rgba;

View File

@@ -80,11 +80,9 @@ namespace Ryujinx.Graphics.OpenGL
Add(Format.R10G10B10A2Uint, new FormatInfo(4, false, false, All.Rgb10A2ui, PixelFormat.RgbaInteger, PixelType.UnsignedInt2101010Reversed));
Add(Format.R11G11B10Float, new FormatInfo(3, false, false, All.R11fG11fB10f, PixelFormat.Rgb, PixelType.UnsignedInt10F11F11FRev));
Add(Format.R9G9B9E5Float, new FormatInfo(3, false, false, All.Rgb9E5, PixelFormat.Rgb, PixelType.UnsignedInt5999Rev));
Add(Format.Bc1RgbUnorm, new FormatInfo(3, true, false, All.CompressedRgbS3tcDxt1Ext));
Add(Format.Bc1RgbaUnorm, new FormatInfo(4, true, false, All.CompressedRgbaS3tcDxt1Ext));
Add(Format.Bc2Unorm, new FormatInfo(4, true, false, All.CompressedRgbaS3tcDxt3Ext));
Add(Format.Bc3Unorm, new FormatInfo(4, true, false, All.CompressedRgbaS3tcDxt5Ext));
Add(Format.Bc1RgbSrgb, new FormatInfo(3, false, false, All.CompressedSrgbS3tcDxt1Ext));
Add(Format.Bc1RgbaSrgb, new FormatInfo(4, true, false, All.CompressedSrgbAlphaS3tcDxt1Ext));
Add(Format.Bc2Srgb, new FormatInfo(4, false, false, All.CompressedSrgbAlphaS3tcDxt3Ext));
Add(Format.Bc3Srgb, new FormatInfo(4, false, false, All.CompressedSrgbAlphaS3tcDxt5Ext));

View File

@@ -104,6 +104,7 @@ namespace Ryujinx.Graphics.OpenGL
hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows,
hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows,
supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
supports3DTextureCompression: false,
supportsBgraFormat: false,
supportsR4G4Format: false,
supportsFragmentShaderInterlock: HwCapabilities.SupportsFragmentShaderInterlock,

View File

@@ -1,7 +1,9 @@
using Ryujinx.Common;
using System;
using System.Runtime.CompilerServices;
using System.Buffers.Binary;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Texture
{
@@ -10,22 +12,30 @@ namespace Ryujinx.Graphics.Texture
private const int BlockWidth = 4;
private const int BlockHeight = 4;
public static byte[] DecodeBC4(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed)
public static byte[] DecodeBC1(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers;
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data);
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<byte> rPal = stackalloc byte[8];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
int baseOOffs = 0;
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
@@ -39,11 +49,302 @@ namespace Ryujinx.Graphics.Texture
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int lineBaseOOffs = baseOOffs + baseX;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC1DecodeTileRgb(tile, data);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(8);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC2(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC23DecodeTileRgb(tile, data.Slice(8));
ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data);
for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, block >>= 4)
{
tile[i] = (byte)((block & 0xf) | (block << 4));
}
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(16);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC3(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<byte> rPal = stackalloc byte[8];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC23DecodeTileRgb(tile, data.Slice(8));
ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data);
rPal[0] = (byte)block;
rPal[1] = (byte)(block >> 8);
BCnLerpAlphaUnorm(rPal);
BCnDecodeTileAlphaRgba(tile, rPal, block >> 16);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(16);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC4(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers;
}
byte[] output = new byte[size];
Span<byte> outputSpan = new Span<byte>(output);
ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data);
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight];
Span<byte> rPal = stackalloc byte[8];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputLine0 = default;
Span<uint> outputLine1 = default;
Span<uint> outputLine2 = default;
Span<uint> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
ulong block = data64[0];
@@ -52,45 +353,43 @@ namespace Ryujinx.Graphics.Texture
if (signed)
{
CalculateBC3AlphaS(rPal);
BCnLerpAlphaSnorm(rPal);
}
else
{
CalculateBC3Alpha(rPal);
BCnLerpAlphaUnorm(rPal);
}
ulong rI = block >> 16;
BCnDecodeTileAlpha(tile, rPal, block >> 16);
for (int texel = 0; texel < BlockWidth * BlockHeight; texel++)
if ((copyWidth | copyHeight) == 4)
{
int tX = texel & 3;
int tY = texel >> 2;
outputLine0[x] = tileAsUint[0];
outputLine1[x] = tileAsUint[1];
outputLine2[x] = tileAsUint[2];
outputLine3[x] = tileAsUint[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
if (baseX + tX >= width || baseY + tY >= height)
for (int tY = 0; tY < copyHeight; tY++)
{
continue;
tile.Slice(tY * 4, copyWidth).CopyTo(outputSpan.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
int shift = texel * 3;
byte r = rPal[(int)((rI >> shift) & 7)];
int oOffs = lineBaseOOffs + tY * width + tX;
output[oOffs] = r;
}
data64 = data64.Slice(1);
}
baseOOffs += width * (baseY + BlockHeight > height ? (height & (BlockHeight - 1)) : BlockHeight);
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
@@ -109,10 +408,22 @@ namespace Ryujinx.Graphics.Texture
ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data);
Span<byte> rTile = stackalloc byte[BlockWidth * BlockHeight * 2];
Span<byte> gTile = stackalloc byte[BlockWidth * BlockHeight * 2];
Span<byte> rPal = stackalloc byte[8];
Span<byte> gPal = stackalloc byte[8];
int baseOOffs = 0;
Span<ushort> outputAsUshort = MemoryMarshal.Cast<byte, ushort>(output);
Span<uint> rTileAsUint = MemoryMarshal.Cast<byte, uint>(rTile);
Span<uint> gTileAsUint = MemoryMarshal.Cast<byte, uint>(gTile);
Span<ulong> outputLine0 = default;
Span<ulong> outputLine1 = default;
Span<ulong> outputLine2 = default;
Span<ulong> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
@@ -126,11 +437,21 @@ namespace Ryujinx.Graphics.Texture
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int lineBaseOOffs = baseOOffs + baseX;
int copyWidth = Math.Min(BlockWidth, width - baseX);
ulong blockL = data64[0];
ulong blockH = data64[1];
@@ -142,101 +463,346 @@ namespace Ryujinx.Graphics.Texture
if (signed)
{
CalculateBC3AlphaS(rPal);
CalculateBC3AlphaS(gPal);
BCnLerpAlphaSnorm(rPal);
BCnLerpAlphaSnorm(gPal);
}
else
{
CalculateBC3Alpha(rPal);
CalculateBC3Alpha(gPal);
BCnLerpAlphaUnorm(rPal);
BCnLerpAlphaUnorm(gPal);
}
ulong rI = blockL >> 16;
ulong gI = blockH >> 16;
BCnDecodeTileAlpha(rTile, rPal, blockL >> 16);
BCnDecodeTileAlpha(gTile, gPal, blockH >> 16);
for (int texel = 0; texel < BlockWidth * BlockHeight; texel++)
if ((copyWidth | copyHeight) == 4)
{
int tX = texel & 3;
int tY = texel >> 2;
outputLine0[x] = InterleaveBytes(rTileAsUint[0], gTileAsUint[0]);
outputLine1[x] = InterleaveBytes(rTileAsUint[1], gTileAsUint[1]);
outputLine2[x] = InterleaveBytes(rTileAsUint[2], gTileAsUint[2]);
outputLine3[x] = InterleaveBytes(rTileAsUint[3], gTileAsUint[3]);
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
if (baseX + tX >= width || baseY + tY >= height)
for (int tY = 0; tY < copyHeight; tY++)
{
continue;
int line = pixelBaseOOffs + width * tY;
for (int tX = 0; tX < copyWidth; tX++)
{
int texel = tY * BlockWidth + tX;
outputAsUshort[line + tX] = (ushort)(rTile[texel] | (gTile[texel] << 8));
}
}
int shift = texel * 3;
byte r = rPal[(int)((rI >> shift) & 7)];
byte g = gPal[(int)((gI >> shift) & 7)];
int oOffs = (lineBaseOOffs + tY * width + tX) * 2;
output[oOffs + 0] = r;
output[oOffs + 1] = g;
}
data64 = data64.Slice(2);
}
baseOOffs += width * (baseY + BlockHeight > height ? (height & (BlockHeight - 1)) : BlockHeight);
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CalculateBC3Alpha(Span<byte> alpha)
private static ulong InterleaveBytes(uint left, uint right)
{
for (int i = 2; i < 8; i++)
return InterleaveBytesWithZeros(left) | (InterleaveBytesWithZeros(right) << 8);
}
private static ulong InterleaveBytesWithZeros(uint value)
{
ulong output = value;
output = (output ^ (output << 16)) & 0xffff0000ffffUL;
output = (output ^ (output << 8)) & 0xff00ff00ff00ffUL;
return output;
}
private static void BCnLerpAlphaUnorm(Span<byte> alpha)
{
byte a0 = alpha[0];
byte a1 = alpha[1];
if (a0 > a1)
{
if (alpha[0] > alpha[1])
alpha[2] = (byte)((6 * a0 + 1 * a1) / 7);
alpha[3] = (byte)((5 * a0 + 2 * a1) / 7);
alpha[4] = (byte)((4 * a0 + 3 * a1) / 7);
alpha[5] = (byte)((3 * a0 + 4 * a1) / 7);
alpha[6] = (byte)((2 * a0 + 5 * a1) / 7);
alpha[7] = (byte)((1 * a0 + 6 * a1) / 7);
}
else
{
alpha[2] = (byte)((4 * a0 + 1 * a1) / 5);
alpha[3] = (byte)((3 * a0 + 2 * a1) / 5);
alpha[4] = (byte)((2 * a0 + 3 * a1) / 5);
alpha[5] = (byte)((1 * a0 + 4 * a1) / 5);
alpha[6] = 0;
alpha[7] = 0xff;
}
}
private static void BCnLerpAlphaSnorm(Span<byte> alpha)
{
sbyte a0 = (sbyte)alpha[0];
sbyte a1 = (sbyte)alpha[1];
if (a0 > a1)
{
alpha[2] = (byte)((6 * a0 + 1 * a1) / 7);
alpha[3] = (byte)((5 * a0 + 2 * a1) / 7);
alpha[4] = (byte)((4 * a0 + 3 * a1) / 7);
alpha[5] = (byte)((3 * a0 + 4 * a1) / 7);
alpha[6] = (byte)((2 * a0 + 5 * a1) / 7);
alpha[7] = (byte)((1 * a0 + 6 * a1) / 7);
}
else
{
alpha[2] = (byte)((4 * a0 + 1 * a1) / 5);
alpha[3] = (byte)((3 * a0 + 2 * a1) / 5);
alpha[4] = (byte)((2 * a0 + 3 * a1) / 5);
alpha[5] = (byte)((1 * a0 + 4 * a1) / 5);
alpha[6] = 0x80;
alpha[7] = 0x7f;
}
}
private unsafe static void BCnDecodeTileAlpha(Span<byte> output, Span<byte> rPal, ulong rI)
{
if (Avx2.IsSupported)
{
Span<Vector128<byte>> outputAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(output);
Vector128<uint> shifts = Vector128.Create(0u, 3u, 6u, 9u);
Vector128<uint> masks = Vector128.Create(7u);
Vector128<byte> vClut;
fixed (byte* pRPal = rPal)
{
alpha[i] = (byte)(((8 - i) * alpha[0] + (i - 1) * alpha[1]) / 7);
vClut = Sse2.LoadScalarVector128((ulong*)pRPal).AsByte();
}
else if (i < 6)
Vector128<uint> indices0 = Vector128.Create((uint)rI);
Vector128<uint> indices1 = Vector128.Create((uint)(rI >> 24));
Vector128<uint> indices00 = Avx2.ShiftRightLogicalVariable(indices0, shifts);
Vector128<uint> indices10 = Avx2.ShiftRightLogicalVariable(indices1, shifts);
Vector128<uint> indices01 = Sse2.ShiftRightLogical(indices00, 12);
Vector128<uint> indices11 = Sse2.ShiftRightLogical(indices10, 12);
indices00 = Sse2.And(indices00, masks);
indices10 = Sse2.And(indices10, masks);
indices01 = Sse2.And(indices01, masks);
indices11 = Sse2.And(indices11, masks);
Vector128<ushort> indicesW0 = Sse41.PackUnsignedSaturate(indices00.AsInt32(), indices01.AsInt32());
Vector128<ushort> indicesW1 = Sse41.PackUnsignedSaturate(indices10.AsInt32(), indices11.AsInt32());
Vector128<byte> indices = Sse2.PackUnsignedSaturate(indicesW0.AsInt16(), indicesW1.AsInt16());
outputAsVector128[0] = Ssse3.Shuffle(vClut, indices);
}
else
{
for (int i = 0; i < BlockWidth * BlockHeight; i++, rI >>= 3)
{
alpha[i] = (byte)(((6 - i) * alpha[0] + (i - 1) * alpha[1]) / 7);
}
else if (i == 6)
{
alpha[i] = 0;
}
else /* i == 7 */
{
alpha[i] = 0xff;
output[i] = rPal[(int)(rI & 7)];
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CalculateBC3AlphaS(Span<byte> alpha)
private unsafe static void BCnDecodeTileAlphaRgba(Span<byte> output, Span<byte> rPal, ulong rI)
{
for (int i = 2; i < 8; i++)
if (Avx2.IsSupported)
{
if ((sbyte)alpha[0] > (sbyte)alpha[1])
Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output);
Vector256<uint> shifts = Vector256.Create(0u, 3u, 6u, 9u, 12u, 15u, 18u, 21u);
Vector128<uint> vClut128;
fixed (byte* pRPal = rPal)
{
alpha[i] = (byte)(((8 - i) * (sbyte)alpha[0] + (i - 1) * (sbyte)alpha[1]) / 7);
vClut128 = Sse2.LoadScalarVector128((ulong*)pRPal).AsUInt32();
}
else if (i < 6)
Vector256<uint> vClut = Avx2.ConvertToVector256Int32(vClut128.AsByte()).AsUInt32();
vClut = Avx2.ShiftLeftLogical(vClut, 24);
Vector256<uint> indices0 = Vector256.Create((uint)rI);
Vector256<uint> indices1 = Vector256.Create((uint)(rI >> 24));
indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts);
indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts);
outputAsVector256[0] = Avx2.Or(outputAsVector256[0], Avx2.PermuteVar8x32(vClut, indices0));
outputAsVector256[1] = Avx2.Or(outputAsVector256[1], Avx2.PermuteVar8x32(vClut, indices1));
}
else
{
for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, rI >>= 3)
{
alpha[i] = (byte)(((6 - i) * (sbyte)alpha[0] + (i - 1) * (sbyte)alpha[1]) / 7);
}
else if (i == 6)
{
alpha[i] = 0x80;
}
else /* i == 7 */
{
alpha[i] = 0x7f;
output[i] = rPal[(int)(rI & 7)];
}
}
}
private unsafe static void BC1DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input)
{
Span<uint> clut = stackalloc uint[4];
uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input);
uint c0 = (ushort)c0c1;
uint c1 = (ushort)(c0c1 >> 16);
clut[0] = ConvertRgb565ToRgb888(c0) | 0xff000000;
clut[1] = ConvertRgb565ToRgb888(c1) | 0xff000000;
clut[2] = BC1LerpRgb2(clut[0], clut[1], c0, c1);
clut[3] = BC1LerpRgb3(clut[0], clut[1], c0, c1);
BCnDecodeTileRgb(clut, output, input);
}
private unsafe static void BC23DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input)
{
Span<uint> clut = stackalloc uint[4];
uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input);
uint c0 = (ushort)c0c1;
uint c1 = (ushort)(c0c1 >> 16);
clut[0] = ConvertRgb565ToRgb888(c0);
clut[1] = ConvertRgb565ToRgb888(c1);
clut[2] = BC23LerpRgb2(clut[0], clut[1]);
clut[3] = BC23LerpRgb3(clut[0], clut[1]);
BCnDecodeTileRgb(clut, output, input);
}
private unsafe static void BCnDecodeTileRgb(Span<uint> clut, Span<byte> output, ReadOnlySpan<byte> input)
{
if (Avx2.IsSupported)
{
Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output);
Vector256<uint> shifts0 = Vector256.Create(0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u);
Vector256<uint> shifts1 = Vector256.Create(16u, 18u, 20u, 22u, 24u, 26u, 28u, 30u);
Vector256<uint> masks = Vector256.Create(3u);
Vector256<uint> vClut;
fixed (uint* pClut = &clut[0])
{
vClut = Sse2.LoadVector128(pClut).ToVector256Unsafe();
}
Vector256<uint> indices0;
fixed (byte* pInput = input)
{
indices0 = Avx2.BroadcastScalarToVector256((uint*)(pInput + 4));
}
Vector256<uint> indices1 = indices0;
indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts0);
indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts1);
indices0 = Avx2.And(indices0, masks);
indices1 = Avx2.And(indices1, masks);
outputAsVector256[0] = Avx2.PermuteVar8x32(vClut, indices0);
outputAsVector256[1] = Avx2.PermuteVar8x32(vClut, indices1);
}
else
{
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
uint indices = BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(4));
for (int i = 0; i < BlockWidth * BlockHeight; i++, indices >>= 2)
{
outputAsUint[i] = clut[(int)(indices & 3)];
}
}
}
private static uint BC1LerpRgb2(uint color0, uint color1, uint c0, uint c1)
{
if (c0 > c1)
{
return BC23LerpRgb2(color0, color1) | 0xff000000;
}
uint carry = color0 & color1;
uint addHalve = ((color0 ^ color1) >> 1) & 0x7f7f7f;
return (addHalve + carry) | 0xff000000;
}
private static uint BC23LerpRgb2(uint color0, uint color1)
{
uint r0 = (byte)color0;
uint g0 = color0 & 0xff00;
uint b0 = color0 & 0xff0000;
uint r1 = (byte)color1;
uint g1 = color1 & 0xff00;
uint b1 = color1 & 0xff0000;
uint mixR = (2 * r0 + r1) / 3;
uint mixG = (2 * g0 + g1) / 3;
uint mixB = (2 * b0 + b1) / 3;
return mixR | (mixG & 0xff00) | (mixB & 0xff0000);
}
private static uint BC1LerpRgb3(uint color0, uint color1, uint c0, uint c1)
{
if (c0 > c1)
{
return BC23LerpRgb3(color0, color1) | 0xff000000;
}
return 0;
}
private static uint BC23LerpRgb3(uint color0, uint color1)
{
uint r0 = (byte)color0;
uint g0 = color0 & 0xff00;
uint b0 = color0 & 0xff0000;
uint r1 = (byte)color1;
uint g1 = color1 & 0xff00;
uint b1 = color1 & 0xff0000;
uint mixR = (2 * r1 + r0) / 3;
uint mixG = (2 * g1 + g0) / 3;
uint mixB = (2 * b1 + b0) / 3;
return mixR | (mixG & 0xff00) | (mixB & 0xff0000);
}
private static uint ConvertRgb565ToRgb888(uint value)
{
uint b = (value & 0x1f) << 19;
uint g = (value << 5) & 0xfc00;
uint r = (value >> 8) & 0xf8;
b |= b >> 5;
g |= g >> 6;
r |= r >> 5;
return r | (g & 0xff00) | (b & 0xff0000);
}
}
}

View File

@@ -55,6 +55,8 @@ namespace Ryujinx.HLE.HOS.Services.Hid
// TODO: signal event at right place
_xpadIdEvent.ReadableEvent.Signal();
_vibrationPermitted = true;
}
[CommandHipc(0)]
@@ -1141,8 +1143,6 @@ namespace Ryujinx.HLE.HOS.Services.Hid
{
context.ResponseData.Write(_vibrationPermitted);
Logger.Stub?.PrintStub(LogClass.ServiceHid, new { _vibrationPermitted });
return ResultCode.Success;
}

View File

@@ -396,7 +396,7 @@ namespace Ryujinx.Modules
if (!OperatingSystem.IsWindows())
{
chmod(ryuBin, 0777);
chmod(ryuBin, 493);
}
}