A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V in… (#3712)
* A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V instructions; they use "Round to Nearest with Ties to Away" rounding mode not supported in x86. All instructions involved have been tested locally in both release and debug modes, in both lowcq and highcq. The titles Mario Strikers and Super Smash Bros. U. use these instructions intensively. * Update Ptc.cs * A32: Add fast path for Vcvta_RM, Vrinta_RM and Vrinta_V instructions aswell.
This commit is contained in:
@ -164,32 +164,74 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
public static void Fcvtas_Gp(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtas_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtas_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtau_Gp(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtau_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtau_V(ArmEmitterContext context)
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fcvtl_V(ArmEmitterContext context)
|
||||
@ -1223,7 +1265,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
|
||||
}
|
||||
|
||||
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
||||
|
||||
@ -1265,7 +1314,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
|
||||
}
|
||||
|
||||
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
||||
|
||||
@ -1314,7 +1370,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
|
||||
}
|
||||
|
||||
Operand zero = context.VectorZero();
|
||||
|
||||
@ -1369,7 +1432,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
|
||||
}
|
||||
|
||||
Operand zero = context.VectorZero();
|
||||
|
||||
@ -1424,7 +1494,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
||||
}
|
||||
|
||||
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
||||
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
||||
@ -1464,7 +1541,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
||||
}
|
||||
|
||||
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
||||
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
||||
@ -1512,7 +1596,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
||||
}
|
||||
|
||||
Operand zero = context.VectorZero();
|
||||
|
||||
@ -1567,7 +1658,14 @@ namespace ARMeilleure.Instructions
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
||||
}
|
||||
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
if (roundMode != FPRoundingMode.ToNearestAway)
|
||||
{
|
||||
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
||||
}
|
||||
else
|
||||
{
|
||||
nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
|
||||
}
|
||||
|
||||
Operand zero = context.VectorZero();
|
||||
|
||||
|
Reference in New Issue
Block a user