31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
525 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
533 if (MO1->getAddrSpace() != MO2->getAddrSpace())
536 const auto *Base1 = MO1->getValue();
537 const auto *Base2 = MO2->getValue();
538 if (!Base1 || !Base2)
543 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
546 return Base1 == Base2;
550 int64_t Offset1,
bool OffsetIsScalable1,
552 int64_t Offset2,
bool OffsetIsScalable2,
553 unsigned ClusterSize,
554 unsigned NumBytes)
const {
567 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
586 const unsigned LoadSize = NumBytes / ClusterSize;
587 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
588 return NumDWords <= MaxMemoryClusterDWords;
602 int64_t Offset0, int64_t Offset1,
603 unsigned NumLoads)
const {
604 assert(Offset1 > Offset0 &&
605 "Second offset should be larger than first offset!");
610 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
617 const char *Msg =
"illegal VGPR to SGPR copy") {
621 C.diagnose(IllegalCopy);
638 assert((
TII.getSubtarget().hasMAIInsts() &&
639 !
TII.getSubtarget().hasGFX90AInsts()) &&
640 "Expected GFX908 subtarget.");
643 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
644 "Source register of the copy should be either an SGPR or an AGPR.");
647 "Destination register of the copy should be an AGPR.");
656 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
659 if (!Def->modifiesRegister(SrcReg, &RI))
662 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
663 Def->getOperand(0).getReg() != SrcReg)
670 bool SafeToPropagate =
true;
673 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
674 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
675 SafeToPropagate =
false;
677 if (!SafeToPropagate)
689 if (ImpUseSuperReg) {
690 Builder.
addReg(ImpUseSuperReg,
708 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
712 "VGPR used for an intermediate copy should have been reserved.");
727 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
728 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
729 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
736 if (ImpUseSuperReg) {
737 UseBuilder.
addReg(ImpUseSuperReg,
759 int16_t SubIdx = BaseIndices[
Idx];
760 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
761 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
762 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
763 unsigned Opcode = AMDGPU::S_MOV_B32;
766 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
767 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
768 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
772 DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 Opcode = AMDGPU::S_MOV_B64;
790 assert(FirstMI && LastMI);
798 LastMI->addRegisterKilled(SrcReg, &RI);
805 bool RenamableDest,
bool RenamableSrc)
const {
807 unsigned Size = RI.getRegSizeInBits(*RC);
809 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
815 if (((
Size == 16) != (SrcSize == 16))) {
822 if (DestReg == SrcReg) {
828 RC = RI.getPhysRegBaseClass(DestReg);
829 Size = RI.getRegSizeInBits(*RC);
830 SrcRC = RI.getPhysRegBaseClass(SrcReg);
831 SrcSize = RI.getRegSizeInBits(*SrcRC);
835 if (RC == &AMDGPU::VGPR_32RegClass) {
837 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
838 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
839 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
840 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
846 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
847 RC == &AMDGPU::SReg_32RegClass) {
848 if (SrcReg == AMDGPU::SCC) {
855 if (DestReg == AMDGPU::VCC_LO) {
856 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
880 if (RC == &AMDGPU::SReg_64RegClass) {
881 if (SrcReg == AMDGPU::SCC) {
888 if (DestReg == AMDGPU::VCC) {
889 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
903 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
913 if (DestReg == AMDGPU::SCC) {
916 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
934 if (RC == &AMDGPU::AGPR_32RegClass) {
935 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
936 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
951 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
958 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
959 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
961 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
962 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
963 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
964 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
981 if (IsAGPRDst || IsAGPRSrc) {
982 if (!DstLow || !SrcLow) {
984 "Cannot use hi16 subreg with an AGPR!");
997 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
998 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1011 if (!DstLow || !SrcLow) {
1013 "Cannot use hi16 subreg on VI!");
1064 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1070 unsigned EltSize = 4;
1071 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1079 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1081 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1087 Opcode = AMDGPU::V_MOV_B64_e32;
1090 Opcode = AMDGPU::V_PK_MOV_B32;
1100 std::unique_ptr<RegScavenger> RS;
1101 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1102 RS = std::make_unique<RegScavenger>();
1108 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1109 const bool CanKillSuperReg = KillSrc && !Overlap;
1114 SubIdx = SubIndices[
Idx];
1116 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1117 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1118 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1119 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1121 bool IsFirstSubreg =
Idx == 0;
1122 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1124 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1128 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1129 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1176 int64_t
Value)
const {
1179 if (RegClass == &AMDGPU::SReg_32RegClass ||
1180 RegClass == &AMDGPU::SGPR_32RegClass ||
1181 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1182 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1188 if (RegClass == &AMDGPU::SReg_64RegClass ||
1189 RegClass == &AMDGPU::SGPR_64RegClass ||
1190 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1196 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1207 unsigned EltSize = 4;
1208 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1210 if (RI.getRegSizeInBits(*RegClass) > 32) {
1211 Opcode = AMDGPU::S_MOV_B64;
1214 Opcode = AMDGPU::S_MOV_B32;
1221 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1224 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1225 Builder.
addImm(IdxValue);
1231 return &AMDGPU::VGPR_32RegClass;
1242 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1243 "Not a VGPR32 reg");
1245 if (
Cond.size() == 1) {
1246 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 }
else if (
Cond.size() == 2) {
1257 switch (
Cond[0].getImm()) {
1258 case SIInstrInfo::SCC_TRUE: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1261 : AMDGPU::S_CSELECT_B64), SReg)
1272 case SIInstrInfo::SCC_FALSE: {
1273 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1275 : AMDGPU::S_CSELECT_B64), SReg)
1286 case SIInstrInfo::VCCNZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1300 case SIInstrInfo::VCCZ: {
1303 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1314 case SIInstrInfo::EXECNZ: {
1315 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1318 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1321 : AMDGPU::S_CSELECT_B64), SReg)
1332 case SIInstrInfo::EXECZ: {
1333 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1336 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1339 : AMDGPU::S_CSELECT_B64), SReg)
1388 return AMDGPU::COPY;
1389 if (RI.getRegSizeInBits(*DstRC) == 16) {
1392 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1394 if (RI.getRegSizeInBits(*DstRC) == 32)
1395 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1396 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1397 return AMDGPU::S_MOV_B64;
1398 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1399 return AMDGPU::V_MOV_B64_PSEUDO;
1400 return AMDGPU::COPY;
1405 bool IsIndirectSrc)
const {
1406 if (IsIndirectSrc) {
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1429 if (VecSize <= 1024)
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1457 if (VecSize <= 1024)
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1486 if (VecSize <= 1024)
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1515 if (VecSize <= 1024)
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1530 if (VecSize <= 1024)
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1538 bool IsSGPR)
const {
1550 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1557 return AMDGPU::SI_SPILL_S32_SAVE;
1559 return AMDGPU::SI_SPILL_S64_SAVE;
1561 return AMDGPU::SI_SPILL_S96_SAVE;
1563 return AMDGPU::SI_SPILL_S128_SAVE;
1565 return AMDGPU::SI_SPILL_S160_SAVE;
1567 return AMDGPU::SI_SPILL_S192_SAVE;
1569 return AMDGPU::SI_SPILL_S224_SAVE;
1571 return AMDGPU::SI_SPILL_S256_SAVE;
1573 return AMDGPU::SI_SPILL_S288_SAVE;
1575 return AMDGPU::SI_SPILL_S320_SAVE;
1577 return AMDGPU::SI_SPILL_S352_SAVE;
1579 return AMDGPU::SI_SPILL_S384_SAVE;
1581 return AMDGPU::SI_SPILL_S512_SAVE;
1583 return AMDGPU::SI_SPILL_S1024_SAVE;
1592 return AMDGPU::SI_SPILL_V32_SAVE;
1594 return AMDGPU::SI_SPILL_V64_SAVE;
1596 return AMDGPU::SI_SPILL_V96_SAVE;
1598 return AMDGPU::SI_SPILL_V128_SAVE;
1600 return AMDGPU::SI_SPILL_V160_SAVE;
1602 return AMDGPU::SI_SPILL_V192_SAVE;
1604 return AMDGPU::SI_SPILL_V224_SAVE;
1606 return AMDGPU::SI_SPILL_V256_SAVE;
1608 return AMDGPU::SI_SPILL_V288_SAVE;
1610 return AMDGPU::SI_SPILL_V320_SAVE;
1612 return AMDGPU::SI_SPILL_V352_SAVE;
1614 return AMDGPU::SI_SPILL_V384_SAVE;
1616 return AMDGPU::SI_SPILL_V512_SAVE;
1618 return AMDGPU::SI_SPILL_V1024_SAVE;
1627 return AMDGPU::SI_SPILL_A32_SAVE;
1629 return AMDGPU::SI_SPILL_A64_SAVE;
1631 return AMDGPU::SI_SPILL_A96_SAVE;
1633 return AMDGPU::SI_SPILL_A128_SAVE;
1635 return AMDGPU::SI_SPILL_A160_SAVE;
1637 return AMDGPU::SI_SPILL_A192_SAVE;
1639 return AMDGPU::SI_SPILL_A224_SAVE;
1641 return AMDGPU::SI_SPILL_A256_SAVE;
1643 return AMDGPU::SI_SPILL_A288_SAVE;
1645 return AMDGPU::SI_SPILL_A320_SAVE;
1647 return AMDGPU::SI_SPILL_A352_SAVE;
1649 return AMDGPU::SI_SPILL_A384_SAVE;
1651 return AMDGPU::SI_SPILL_A512_SAVE;
1653 return AMDGPU::SI_SPILL_A1024_SAVE;
1662 return AMDGPU::SI_SPILL_AV32_SAVE;
1664 return AMDGPU::SI_SPILL_AV64_SAVE;
1666 return AMDGPU::SI_SPILL_AV96_SAVE;
1668 return AMDGPU::SI_SPILL_AV128_SAVE;
1670 return AMDGPU::SI_SPILL_AV160_SAVE;
1672 return AMDGPU::SI_SPILL_AV192_SAVE;
1674 return AMDGPU::SI_SPILL_AV224_SAVE;
1676 return AMDGPU::SI_SPILL_AV256_SAVE;
1678 return AMDGPU::SI_SPILL_AV288_SAVE;
1680 return AMDGPU::SI_SPILL_AV320_SAVE;
1682 return AMDGPU::SI_SPILL_AV352_SAVE;
1684 return AMDGPU::SI_SPILL_AV384_SAVE;
1686 return AMDGPU::SI_SPILL_AV512_SAVE;
1688 return AMDGPU::SI_SPILL_AV1024_SAVE;
1695 bool IsVectorSuperClass) {
1700 if (IsVectorSuperClass)
1701 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1703 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1711 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1717 if (IsVectorSuperClass)
1738 FrameInfo.getObjectAlign(FrameIndex));
1739 unsigned SpillSize =
TRI->getSpillSize(*RC);
1744 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1745 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1746 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1754 if (SrcReg.
isVirtual() && SpillSize == 4) {
1755 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1770 SpillSize, RI, *MFI);
1784 return AMDGPU::SI_SPILL_S32_RESTORE;
1786 return AMDGPU::SI_SPILL_S64_RESTORE;
1788 return AMDGPU::SI_SPILL_S96_RESTORE;
1790 return AMDGPU::SI_SPILL_S128_RESTORE;
1792 return AMDGPU::SI_SPILL_S160_RESTORE;
1794 return AMDGPU::SI_SPILL_S192_RESTORE;
1796 return AMDGPU::SI_SPILL_S224_RESTORE;
1798 return AMDGPU::SI_SPILL_S256_RESTORE;
1800 return AMDGPU::SI_SPILL_S288_RESTORE;
1802 return AMDGPU::SI_SPILL_S320_RESTORE;
1804 return AMDGPU::SI_SPILL_S352_RESTORE;
1806 return AMDGPU::SI_SPILL_S384_RESTORE;
1808 return AMDGPU::SI_SPILL_S512_RESTORE;
1810 return AMDGPU::SI_SPILL_S1024_RESTORE;
1819 return AMDGPU::SI_SPILL_V32_RESTORE;
1821 return AMDGPU::SI_SPILL_V64_RESTORE;
1823 return AMDGPU::SI_SPILL_V96_RESTORE;
1825 return AMDGPU::SI_SPILL_V128_RESTORE;
1827 return AMDGPU::SI_SPILL_V160_RESTORE;
1829 return AMDGPU::SI_SPILL_V192_RESTORE;
1831 return AMDGPU::SI_SPILL_V224_RESTORE;
1833 return AMDGPU::SI_SPILL_V256_RESTORE;
1835 return AMDGPU::SI_SPILL_V288_RESTORE;
1837 return AMDGPU::SI_SPILL_V320_RESTORE;
1839 return AMDGPU::SI_SPILL_V352_RESTORE;
1841 return AMDGPU::SI_SPILL_V384_RESTORE;
1843 return AMDGPU::SI_SPILL_V512_RESTORE;
1845 return AMDGPU::SI_SPILL_V1024_RESTORE;
1854 return AMDGPU::SI_SPILL_A32_RESTORE;
1856 return AMDGPU::SI_SPILL_A64_RESTORE;
1858 return AMDGPU::SI_SPILL_A96_RESTORE;
1860 return AMDGPU::SI_SPILL_A128_RESTORE;
1862 return AMDGPU::SI_SPILL_A160_RESTORE;
1864 return AMDGPU::SI_SPILL_A192_RESTORE;
1866 return AMDGPU::SI_SPILL_A224_RESTORE;
1868 return AMDGPU::SI_SPILL_A256_RESTORE;
1870 return AMDGPU::SI_SPILL_A288_RESTORE;
1872 return AMDGPU::SI_SPILL_A320_RESTORE;
1874 return AMDGPU::SI_SPILL_A352_RESTORE;
1876 return AMDGPU::SI_SPILL_A384_RESTORE;
1878 return AMDGPU::SI_SPILL_A512_RESTORE;
1880 return AMDGPU::SI_SPILL_A1024_RESTORE;
1889 return AMDGPU::SI_SPILL_AV32_RESTORE;
1891 return AMDGPU::SI_SPILL_AV64_RESTORE;
1893 return AMDGPU::SI_SPILL_AV96_RESTORE;
1895 return AMDGPU::SI_SPILL_AV128_RESTORE;
1897 return AMDGPU::SI_SPILL_AV160_RESTORE;
1899 return AMDGPU::SI_SPILL_AV192_RESTORE;
1901 return AMDGPU::SI_SPILL_AV224_RESTORE;
1903 return AMDGPU::SI_SPILL_AV256_RESTORE;
1905 return AMDGPU::SI_SPILL_AV288_RESTORE;
1907 return AMDGPU::SI_SPILL_AV320_RESTORE;
1909 return AMDGPU::SI_SPILL_AV352_RESTORE;
1911 return AMDGPU::SI_SPILL_AV384_RESTORE;
1913 return AMDGPU::SI_SPILL_AV512_RESTORE;
1915 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1922 bool IsVectorSuperClass) {
1927 if (IsVectorSuperClass)
1928 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1930 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1937 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1943 if (IsVectorSuperClass)
1961 unsigned SpillSize =
TRI->getSpillSize(*RC);
1968 FrameInfo.getObjectAlign(FrameIndex));
1972 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1973 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1974 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1979 if (DestReg.
isVirtual() && SpillSize == 4) {
1981 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1995 SpillSize, RI, *MFI);
2010 unsigned Quantity)
const {
2012 while (Quantity > 0) {
2013 unsigned Arg = std::min(Quantity, 8u);
2027 if (HasNoTerminator) {
2028 if (
Info->returnsVoid()) {
2042 constexpr unsigned DoorbellIDMask = 0x3ff;
2043 constexpr unsigned ECQueueWaveAbort = 0x400;
2061 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2065 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2068 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2069 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2073 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2074 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2075 .
addUse(DoorbellRegMasked)
2076 .
addImm(ECQueueWaveAbort);
2077 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2078 .
addUse(SetWaveAbortBit);
2081 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2096 switch (
MI.getOpcode()) {
2098 if (
MI.isMetaInstruction())
2103 return MI.getOperand(0).getImm() + 1;
2112 switch (
MI.getOpcode()) {
2114 case AMDGPU::S_MOV_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2120 case AMDGPU::S_MOV_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2126 case AMDGPU::S_XOR_B64_term:
2129 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2132 case AMDGPU::S_XOR_B32_term:
2135 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2137 case AMDGPU::S_OR_B64_term:
2140 MI.setDesc(
get(AMDGPU::S_OR_B64));
2142 case AMDGPU::S_OR_B32_term:
2145 MI.setDesc(
get(AMDGPU::S_OR_B32));
2148 case AMDGPU::S_ANDN2_B64_term:
2151 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2154 case AMDGPU::S_ANDN2_B32_term:
2157 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2160 case AMDGPU::S_AND_B64_term:
2163 MI.setDesc(
get(AMDGPU::S_AND_B64));
2166 case AMDGPU::S_AND_B32_term:
2169 MI.setDesc(
get(AMDGPU::S_AND_B32));
2172 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2175 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2178 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2181 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2184 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2185 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2188 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2189 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2192 case AMDGPU::V_MOV_B64_PSEUDO: {
2194 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2195 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2201 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2206 if (
SrcOp.isImm()) {
2208 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2209 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2252 MI.eraseFromParent();
2255 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2259 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2264 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2269 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2270 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2272 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2273 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2280 MI.eraseFromParent();
2283 case AMDGPU::V_SET_INACTIVE_B32: {
2287 .
add(
MI.getOperand(3))
2288 .
add(
MI.getOperand(4))
2289 .
add(
MI.getOperand(1))
2290 .
add(
MI.getOperand(2))
2291 .
add(
MI.getOperand(5));
2292 MI.eraseFromParent();
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2328 Opc = AMDGPU::V_MOVRELD_B32_e32;
2330 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2331 : AMDGPU::S_MOVRELD_B32;
2336 bool IsUndef =
MI.getOperand(1).isUndef();
2337 unsigned SubReg =
MI.getOperand(3).getImm();
2338 assert(VecReg ==
MI.getOperand(1).getReg());
2343 .
add(
MI.getOperand(2))
2347 const int ImpDefIdx =
2349 const int ImpUseIdx = ImpDefIdx + 1;
2351 MI.eraseFromParent();
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2368 bool IsUndef =
MI.getOperand(1).isUndef();
2377 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2381 .
add(
MI.getOperand(2))
2386 const int ImpDefIdx =
2388 const int ImpUseIdx = ImpDefIdx + 1;
2395 MI.eraseFromParent();
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2413 bool IsUndef =
MI.getOperand(1).isUndef();
2431 MI.eraseFromParent();
2434 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2437 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2438 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2461 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2468 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2478 MI.eraseFromParent();
2481 case AMDGPU::ENTER_STRICT_WWM: {
2485 : AMDGPU::S_OR_SAVEEXEC_B64));
2488 case AMDGPU::ENTER_STRICT_WQM: {
2491 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2492 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2493 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2497 MI.eraseFromParent();
2500 case AMDGPU::EXIT_STRICT_WWM:
2501 case AMDGPU::EXIT_STRICT_WQM: {
2504 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2507 case AMDGPU::SI_RETURN: {
2521 MI.eraseFromParent();
2525 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2526 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2527 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2530 case AMDGPU::S_GETPC_B64_pseudo:
2531 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2534 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2556 case AMDGPU::S_LOAD_DWORDX16_IMM:
2557 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2570 for (
auto &CandMO :
I->operands()) {
2571 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2579 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2587 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2589 unsigned NewOpcode = -1;
2590 if (SubregSize == 256)
2591 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2592 else if (SubregSize == 128)
2593 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2600 MRI.setRegClass(DestReg, NewRC);
2603 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2608 MI->getOperand(0).setReg(DestReg);
2609 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2613 OffsetMO->
setImm(FinalOffset);
2619 MI->setMemRefs(*MF, NewMMOs);
2632std::pair<MachineInstr*, MachineInstr*>
2634 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2639 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2640 return std::pair(&
MI,
nullptr);
2651 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2653 if (Dst.isPhysical()) {
2654 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2657 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2661 for (
unsigned I = 1;
I <= 2; ++
I) {
2664 if (
SrcOp.isImm()) {
2666 Imm.ashrInPlace(Part * 32);
2667 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2671 if (Src.isPhysical())
2672 MovDPP.addReg(RI.getSubReg(Src, Sub));
2679 MovDPP.addImm(MO.getImm());
2681 Split[Part] = MovDPP;
2685 if (Dst.isVirtual())
2692 MI.eraseFromParent();
2693 return std::pair(Split[0], Split[1]);
2696std::optional<DestSourcePair>
2698 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2701 return std::nullopt;
2706 unsigned Src0OpName,
2708 unsigned Src1OpName)
const {
2715 "All commutable instructions have both src0 and src1 modifiers");
2717 int Src0ModsVal = Src0Mods->
getImm();
2718 int Src1ModsVal = Src1Mods->
getImm();
2720 Src1Mods->
setImm(Src0ModsVal);
2721 Src0Mods->
setImm(Src1ModsVal);
2730 bool IsKill = RegOp.
isKill();
2732 bool IsUndef = RegOp.
isUndef();
2733 bool IsDebug = RegOp.
isDebug();
2735 if (NonRegOp.
isImm())
2737 else if (NonRegOp.
isFI())
2758 int64_t NonRegVal = NonRegOp1.
getImm();
2761 NonRegOp2.
setImm(NonRegVal);
2778 unsigned Opc =
MI.getOpcode();
2786 if ((
int)OpIdx0 == Src0Idx && !MO0->
isReg() &&
2789 if ((
int)OpIdx1 == Src0Idx && !MO1->
isReg() &&
2794 if ((
int)OpIdx1 != Src0Idx && MO0->
isReg()) {
2799 if ((
int)OpIdx0 != Src0Idx && MO1->
isReg()) {
2813 unsigned Src1Idx)
const {
2814 assert(!NewMI &&
"this should never be used");
2816 unsigned Opc =
MI.getOpcode();
2818 if (CommutedOpcode == -1)
2821 if (Src0Idx > Src1Idx)
2825 static_cast<int>(Src0Idx) &&
2827 static_cast<int>(Src1Idx) &&
2828 "inconsistency with findCommutedOpIndices");
2853 Src1, AMDGPU::OpName::src1_modifiers);
2856 AMDGPU::OpName::src1_sel);
2868 unsigned &SrcOpIdx0,
2869 unsigned &SrcOpIdx1)
const {
2874 unsigned &SrcOpIdx0,
2875 unsigned &SrcOpIdx1)
const {
2876 if (!
Desc.isCommutable())
2879 unsigned Opc =
Desc.getOpcode();
2888 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2892 int64_t BrOffset)
const {
2895 assert(BranchOp != AMDGPU::S_SETPC_B64);
2909 return MI.getOperand(0).getMBB();
2914 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2915 MI.getOpcode() == AMDGPU::SI_LOOP)
2926 assert(RS &&
"RegScavenger required for long branching");
2928 "new block should be inserted for expanding unconditional branch");
2931 "restore block should be inserted for restoring clobbered registers");
2939 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2947 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2948 if (FlushSGPRWrites)
2956 ApplyHazardWorkarounds();
2960 MCCtx.createTempSymbol(
"post_getpc",
true);
2964 MCCtx.createTempSymbol(
"offset_lo",
true);
2966 MCCtx.createTempSymbol(
"offset_hi",
true);
2969 .
addReg(PCReg, 0, AMDGPU::sub0)
2973 .
addReg(PCReg, 0, AMDGPU::sub1)
2975 ApplyHazardWorkarounds();
3016 if (LongBranchReservedReg) {
3018 Scav = LongBranchReservedReg;
3027 MRI.replaceRegWith(PCReg, Scav);
3028 MRI.clearVirtRegs();
3034 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3035 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3036 MRI.clearVirtRegs();
3051unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3053 case SIInstrInfo::SCC_TRUE:
3054 return AMDGPU::S_CBRANCH_SCC1;
3055 case SIInstrInfo::SCC_FALSE:
3056 return AMDGPU::S_CBRANCH_SCC0;
3057 case SIInstrInfo::VCCNZ:
3058 return AMDGPU::S_CBRANCH_VCCNZ;
3059 case SIInstrInfo::VCCZ:
3060 return AMDGPU::S_CBRANCH_VCCZ;
3061 case SIInstrInfo::EXECNZ:
3062 return AMDGPU::S_CBRANCH_EXECNZ;
3063 case SIInstrInfo::EXECZ:
3064 return AMDGPU::S_CBRANCH_EXECZ;
3070SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3072 case AMDGPU::S_CBRANCH_SCC0:
3074 case AMDGPU::S_CBRANCH_SCC1:
3076 case AMDGPU::S_CBRANCH_VCCNZ:
3078 case AMDGPU::S_CBRANCH_VCCZ:
3080 case AMDGPU::S_CBRANCH_EXECNZ:
3082 case AMDGPU::S_CBRANCH_EXECZ:
3094 bool AllowModify)
const {
3095 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3097 TBB =
I->getOperand(0).getMBB();
3101 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3102 if (Pred == INVALID_BR)
3107 Cond.push_back(
I->getOperand(1));
3117 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3119 FBB =
I->getOperand(0).getMBB();
3129 bool AllowModify)
const {
3137 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3138 switch (
I->getOpcode()) {
3139 case AMDGPU::S_MOV_B64_term:
3140 case AMDGPU::S_XOR_B64_term:
3141 case AMDGPU::S_OR_B64_term:
3142 case AMDGPU::S_ANDN2_B64_term:
3143 case AMDGPU::S_AND_B64_term:
3144 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3145 case AMDGPU::S_MOV_B32_term:
3146 case AMDGPU::S_XOR_B32_term:
3147 case AMDGPU::S_OR_B32_term:
3148 case AMDGPU::S_ANDN2_B32_term:
3149 case AMDGPU::S_AND_B32_term:
3150 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3153 case AMDGPU::SI_ELSE:
3154 case AMDGPU::SI_KILL_I1_TERMINATOR:
3155 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3172 int *BytesRemoved)
const {
3174 unsigned RemovedSize = 0;
3177 if (
MI.isBranch() ||
MI.isReturn()) {
3179 MI.eraseFromParent();
3185 *BytesRemoved = RemovedSize;
3202 int *BytesAdded)
const {
3203 if (!FBB &&
Cond.empty()) {
3214 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3251 if (
Cond.size() != 2) {
3266 Register FalseReg,
int &CondCycles,
3267 int &TrueCycles,
int &FalseCycles)
const {
3268 switch (
Cond[0].getImm()) {
3273 if (
MRI.getRegClass(FalseReg) != RC)
3277 CondCycles = TrueCycles = FalseCycles = NumInsts;
3280 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3288 if (
MRI.getRegClass(FalseReg) != RC)
3294 if (NumInsts % 2 == 0)
3297 CondCycles = TrueCycles = FalseCycles = NumInsts;
3309 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3310 if (Pred == VCCZ || Pred == SCC_FALSE) {
3311 Pred =
static_cast<BranchPredicate
>(-Pred);
3317 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3319 if (DstSize == 32) {
3321 if (Pred == SCC_TRUE) {
3336 if (DstSize == 64 && Pred == SCC_TRUE) {
3346 static const int16_t Sub0_15[] = {
3347 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3348 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3349 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3350 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3353 static const int16_t Sub0_15_64[] = {
3354 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3355 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3356 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3357 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3360 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3362 const int16_t *SubIndices = Sub0_15;
3363 int NElts = DstSize / 32;
3367 if (Pred == SCC_TRUE) {
3369 SelOp = AMDGPU::S_CSELECT_B32;
3370 EltRC = &AMDGPU::SGPR_32RegClass;
3372 SelOp = AMDGPU::S_CSELECT_B64;
3373 EltRC = &AMDGPU::SGPR_64RegClass;
3374 SubIndices = Sub0_15_64;
3380 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3385 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3386 Register DstElt =
MRI.createVirtualRegister(EltRC);
3389 unsigned SubIdx = SubIndices[
Idx];
3392 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3395 .
addReg(FalseReg, 0, SubIdx)
3396 .
addReg(TrueReg, 0, SubIdx);
3400 .
addReg(TrueReg, 0, SubIdx)
3401 .
addReg(FalseReg, 0, SubIdx);
3413 switch (
MI.getOpcode()) {
3414 case AMDGPU::V_MOV_B16_t16_e32:
3415 case AMDGPU::V_MOV_B16_t16_e64:
3416 case AMDGPU::V_MOV_B32_e32:
3417 case AMDGPU::V_MOV_B32_e64:
3418 case AMDGPU::V_MOV_B64_PSEUDO:
3419 case AMDGPU::V_MOV_B64_e32:
3420 case AMDGPU::V_MOV_B64_e64:
3421 case AMDGPU::S_MOV_B32:
3422 case AMDGPU::S_MOV_B64:
3423 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3425 case AMDGPU::WWM_COPY:
3426 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3427 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3428 case AMDGPU::V_ACCVGPR_MOV_B32:
3436 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3437 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3438 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3441 unsigned Opc =
MI.getOpcode();
3445 MI.removeOperand(
Idx);
3451 if (!
MRI->hasOneNonDBGUse(Reg))
3454 switch (
DefMI.getOpcode()) {
3457 case AMDGPU::V_MOV_B64_e32:
3458 case AMDGPU::S_MOV_B64:
3459 case AMDGPU::V_MOV_B64_PSEUDO:
3460 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3461 case AMDGPU::V_MOV_B32_e32:
3462 case AMDGPU::S_MOV_B32:
3463 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3470 if (!ImmOp->
isImm())
3473 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3474 int64_t Imm = ImmOp->
getImm();
3475 switch (UseOp.getSubReg()) {
3483 return SignExtend64<16>(Imm);
3485 return SignExtend64<16>(Imm >> 16);
3486 case AMDGPU::sub1_lo16:
3487 return SignExtend64<16>(Imm >> 32);
3488 case AMDGPU::sub1_hi16:
3489 return SignExtend64<16>(Imm >> 48);
3493 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3495 unsigned Opc =
UseMI.getOpcode();
3496 if (Opc == AMDGPU::COPY) {
3497 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3501 bool Is16Bit = OpSize == 2;
3502 bool Is64Bit = OpSize == 8;
3504 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3505 : AMDGPU::V_MOV_B32_e32
3506 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3507 : AMDGPU::S_MOV_B32;
3508 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)),
3514 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3521 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3524 UseMI.getOperand(0).setSubReg(0);
3527 UseMI.getOperand(0).setReg(DstReg);
3537 UseMI.setDesc(NewMCID);
3538 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3543 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3544 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3545 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3546 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547 Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3562 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3563 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3565 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3566 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3567 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3575 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3576 if (!RegSrc->
isReg())
3594 if (Def && Def->isMoveImmediate() &&
3599 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3601 : AMDGPU::V_FMAMK_F16)
3602 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3609 if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3612 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3618 unsigned SrcSubReg = RegSrc->
getSubReg();
3623 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3624 Opc == AMDGPU::V_FMAC_F32_e64 ||
3625 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3626 UseMI.untieRegOperand(
3629 Src1->ChangeToImmediate(Imm);
3634 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3636 DefMI.eraseFromParent();
3646 bool Src0Inlined =
false;
3647 if (Src0->
isReg()) {
3652 if (Def && Def->isMoveImmediate() &&
3664 if (Src1->
isReg() && !Src0Inlined) {
3667 if (Def && Def->isMoveImmediate() &&
3678 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3680 : AMDGPU::V_FMAAK_F16)
3681 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3688 if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3694 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3695 Opc == AMDGPU::V_FMAC_F32_e64 ||
3696 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3697 UseMI.untieRegOperand(
3711 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3713 DefMI.eraseFromParent();
3725 if (BaseOps1.
size() != BaseOps2.
size())
3727 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3728 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3736 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3737 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3738 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3740 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3743bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3746 int64_t Offset0, Offset1;
3748 bool Offset0IsScalable, Offset1IsScalable;
3770 "MIa must load from or modify a memory location");
3772 "MIb must load from or modify a memory location");
3791 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3798 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3808 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3822 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3833 if (Reg.isPhysical())
3835 auto *Def =
MRI.getUniqueVRegDef(Reg);
3837 Imm = Def->getOperand(1).getImm();
3857 unsigned NumOps =
MI.getNumOperands();
3858 for (
unsigned I = 1;
I < NumOps; ++
I) {
3860 if (
Op.isReg() &&
Op.isKill())
3868 case AMDGPU::V_MAC_F16_e32:
3869 case AMDGPU::V_MAC_F16_e64:
3870 return AMDGPU::V_MAD_F16_e64;
3871 case AMDGPU::V_MAC_F32_e32:
3872 case AMDGPU::V_MAC_F32_e64:
3873 return AMDGPU::V_MAD_F32_e64;
3874 case AMDGPU::V_MAC_LEGACY_F32_e32:
3875 case AMDGPU::V_MAC_LEGACY_F32_e64:
3876 return AMDGPU::V_MAD_LEGACY_F32_e64;
3877 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3878 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3879 return AMDGPU::V_FMA_LEGACY_F32_e64;
3880 case AMDGPU::V_FMAC_F16_e32:
3881 case AMDGPU::V_FMAC_F16_e64:
3882 case AMDGPU::V_FMAC_F16_fake16_e64:
3883 return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
3884 : AMDGPU::V_FMA_F16_gfx9_e64;
3885 case AMDGPU::V_FMAC_F32_e32:
3886 case AMDGPU::V_FMAC_F32_e64:
3887 return AMDGPU::V_FMA_F32_e64;
3888 case AMDGPU::V_FMAC_F64_e32:
3889 case AMDGPU::V_FMAC_F64_e64:
3890 return AMDGPU::V_FMA_F64_e64;
3900 unsigned Opc =
MI.getOpcode();
3904 if (NewMFMAOpc != -1) {
3907 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3908 MIB.
add(
MI.getOperand(
I));
3914 if (Def.isEarlyClobber() && Def.isReg() &&
3919 auto UpdateDefIndex = [&](
LiveRange &LR) {
3920 auto *S = LR.
find(OldIndex);
3921 if (S != LR.end() && S->start == OldIndex) {
3922 assert(S->valno && S->valno->def == OldIndex);
3923 S->start = NewIndex;
3924 S->valno->def = NewIndex;
3928 for (
auto &SR : LI.subranges())
3939 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3950 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951 "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3955 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3956 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3957 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3958 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3959 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3960 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3961 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3962 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3963 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3964 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3965 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3966 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3967 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3968 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3969 bool Src0Literal =
false;
3974 case AMDGPU::V_MAC_F16_e64:
3975 case AMDGPU::V_FMAC_F16_e64:
3976 case AMDGPU::V_FMAC_F16_fake16_e64:
3977 case AMDGPU::V_MAC_F32_e64:
3978 case AMDGPU::V_MAC_LEGACY_F32_e64:
3979 case AMDGPU::V_FMAC_F32_e64:
3980 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3981 case AMDGPU::V_FMAC_F64_e64:
3983 case AMDGPU::V_MAC_F16_e32:
3984 case AMDGPU::V_FMAC_F16_e32:
3985 case AMDGPU::V_MAC_F32_e32:
3986 case AMDGPU::V_MAC_LEGACY_F32_e32:
3987 case AMDGPU::V_FMAC_F32_e32:
3988 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3989 case AMDGPU::V_FMAC_F64_e32: {
3991 AMDGPU::OpName::src0);
4018 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
4024 const auto killDef = [&]() ->
void {
4029 if (
MRI.hasOneNonDBGUse(DefReg)) {
4046 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4048 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4049 MIOp.setIsUndef(
true);
4050 MIOp.setReg(DummyReg);
4062 : AMDGPU::V_FMAAK_F16)
4063 : AMDGPU::V_FMAAK_F32)
4064 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4081 : AMDGPU::V_FMAMK_F16)
4082 : AMDGPU::V_FMAMK_F32)
4083 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4148 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4159 switch (
MI.getOpcode()) {
4160 case AMDGPU::S_SET_GPR_IDX_ON:
4161 case AMDGPU::S_SET_GPR_IDX_MODE:
4162 case AMDGPU::S_SET_GPR_IDX_OFF:
4180 if (
MI.isTerminator() ||
MI.isPosition())
4184 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4187 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4193 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4194 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4195 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4196 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4201 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4212 unsigned Opcode =
MI.getOpcode();
4227 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4228 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4229 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4232 if (
MI.isCall() ||
MI.isInlineAsm())
4248 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4249 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4250 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4251 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4259 if (
MI.isMetaInstruction())
4263 if (
MI.isCopyLike()) {
4268 return MI.readsRegister(AMDGPU::EXEC, &RI);
4279 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4283 switch (Imm.getBitWidth()) {
4303 APInt IntImm = Imm.bitcastToAPInt();
4323 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4332 int64_t Imm = MO.
getImm();
4333 switch (OperandType) {
4346 int32_t Trunc =
static_cast<int32_t
>(Imm);
4386 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4391 int16_t Trunc =
static_cast<int16_t
>(Imm);
4402 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4403 int16_t Trunc =
static_cast<int16_t
>(Imm);
4463 AMDGPU::OpName::src2))
4479 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4499 return Mods && Mods->
getImm();
4512 switch (
MI.getOpcode()) {
4513 default:
return false;
4515 case AMDGPU::V_ADDC_U32_e64:
4516 case AMDGPU::V_SUBB_U32_e64:
4517 case AMDGPU::V_SUBBREV_U32_e64: {
4525 case AMDGPU::V_MAC_F16_e64:
4526 case AMDGPU::V_MAC_F32_e64:
4527 case AMDGPU::V_MAC_LEGACY_F32_e64:
4528 case AMDGPU::V_FMAC_F16_e64:
4529 case AMDGPU::V_FMAC_F16_fake16_e64:
4530 case AMDGPU::V_FMAC_F32_e64:
4531 case AMDGPU::V_FMAC_F64_e64:
4532 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4538 case AMDGPU::V_CNDMASK_B32_e64:
4574 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4583 unsigned Op32)
const {
4597 Inst32.
add(
MI.getOperand(
I));
4601 int Idx =
MI.getNumExplicitDefs();
4603 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4641 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4646 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4647 MO.
getReg() == AMDGPU::VCC_LO;
4649 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4650 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4659 switch (MO.getReg()) {
4661 case AMDGPU::VCC_LO:
4662 case AMDGPU::VCC_HI:
4664 case AMDGPU::FLAT_SCR:
4677 switch (
MI.getOpcode()) {
4678 case AMDGPU::V_READLANE_B32:
4679 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4680 case AMDGPU::V_WRITELANE_B32:
4681 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4688 if (
MI.isPreISelOpcode() ||
4689 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4704 if (
SubReg.getReg().isPhysical())
4707 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4719 ErrInfo =
"illegal copy from vector register to SGPR";
4737 if (!
MRI.isSSA() &&
MI.isCopy())
4738 return verifyCopy(
MI,
MRI, ErrInfo);
4740 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4747 if (Src0Idx == -1) {
4757 if (!
Desc.isVariadic() &&
4758 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4759 ErrInfo =
"Instruction has wrong number of operands.";
4763 if (
MI.isInlineAsm()) {
4776 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4777 ErrInfo =
"inlineasm operand has incorrect register class.";
4785 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4786 ErrInfo =
"missing memory operand from image instruction.";
4791 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4794 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4795 "all fp values to integers.";
4799 int RegClass =
Desc.operands()[i].RegClass;
4801 switch (
Desc.operands()[i].OperandType) {
4803 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4804 ErrInfo =
"Illegal immediate value for operand.";
4825 ErrInfo =
"Illegal immediate value for operand.";
4832 ErrInfo =
"Expected inline constant for operand.";
4841 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4842 ErrInfo =
"Expected immediate, but got non-immediate";
4864 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
4873 ErrInfo =
"Subtarget requires even aligned vector registers";
4878 if (RegClass != -1) {
4879 if (Reg.isVirtual())
4884 ErrInfo =
"Operand has incorrect register class.";
4893 ErrInfo =
"SDWA is not supported on this target";
4899 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4907 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4914 "Only reg allowed as operands in SDWA instructions on GFX9+";
4923 if (OMod !=
nullptr &&
4925 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4930 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
4931 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
4932 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
4933 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
4936 unsigned Mods = Src0ModsMO->
getImm();
4939 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
4945 if (
isVOPC(BasicOpcode)) {
4949 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4950 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4956 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4957 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4963 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4964 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4971 if (DstUnused && DstUnused->isImm() &&
4974 if (!Dst.isReg() || !Dst.isTied()) {
4975 ErrInfo =
"Dst register should have tied register";
4980 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4983 "Dst register should be tied to implicit use of preserved register";
4987 ErrInfo =
"Dst register should use same physical register as preserved";
5019 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5020 if (RegCount > DstSize) {
5021 ErrInfo =
"Image instruction returns too many registers for dst "
5030 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5031 unsigned ConstantBusCount = 0;
5032 bool UsesLiteral =
false;
5039 LiteralVal = &
MI.getOperand(ImmIdx);
5048 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5059 }
else if (!MO.
isFI()) {
5066 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5076 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5077 return !RI.regsOverlap(SGPRUsed, SGPR);
5087 Opcode != AMDGPU::V_WRITELANE_B32) {
5088 ErrInfo =
"VOP* instruction violates constant bus restriction";
5093 ErrInfo =
"VOP3 instruction uses literal";
5100 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5101 unsigned SGPRCount = 0;
5104 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5112 if (MO.
getReg() != SGPRUsed)
5118 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5125 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5126 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5133 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5143 ErrInfo =
"ABS not allowed in VOP3B instructions";
5156 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5163 if (
Desc.isBranch()) {
5165 ErrInfo =
"invalid branch target for SOPK instruction";
5171 if (!isUInt<16>(Imm)) {
5172 ErrInfo =
"invalid immediate for SOPK instruction";
5176 if (!isInt<16>(Imm)) {
5177 ErrInfo =
"invalid immediate for SOPK instruction";
5184 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5185 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5186 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5187 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5188 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5189 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5191 const unsigned StaticNumOps =
5192 Desc.getNumOperands() +
Desc.implicit_uses().size();
5193 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5198 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5199 ErrInfo =
"missing implicit register operands";
5205 if (!Dst->isUse()) {
5206 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5211 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5212 UseOpIdx != StaticNumOps + 1) {
5213 ErrInfo =
"movrel implicit operands should be tied";
5220 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5222 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5223 ErrInfo =
"src0 should be subreg of implicit vector use";
5231 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5232 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5238 if (
MI.mayStore() &&
5243 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5244 ErrInfo =
"scalar stores must use m0 as offset register";
5252 if (
Offset->getImm() != 0) {
5253 ErrInfo =
"subtarget does not support offsets in flat instructions";
5260 if (GDSOp && GDSOp->
getImm() != 0) {
5261 ErrInfo =
"GDS is not supported on this subtarget";
5270 AMDGPU::OpName::vaddr0);
5272 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5281 ErrInfo =
"dim is out of range";
5288 IsA16 = R128A16->
getImm() != 0;
5289 }
else if (ST.
hasA16()) {
5291 IsA16 = A16->
getImm() != 0;
5294 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5296 unsigned AddrWords =
5299 unsigned VAddrWords;
5301 VAddrWords = RsrcIdx - VAddr0Idx;
5304 unsigned LastVAddrIdx = RsrcIdx - 1;
5305 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5313 if (VAddrWords != AddrWords) {
5315 <<
" but got " << VAddrWords <<
"\n");
5316 ErrInfo =
"bad vaddr size";
5324 using namespace AMDGPU::DPP;
5326 unsigned DC = DppCt->
getImm();
5327 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5328 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5329 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5330 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5331 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5332 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5333 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5334 ErrInfo =
"Invalid dpp_ctrl value";
5337 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5339 ErrInfo =
"Invalid dpp_ctrl value: "
5340 "wavefront shifts are not supported on GFX10+";
5343 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5345 ErrInfo =
"Invalid dpp_ctrl value: "
5346 "broadcasts are not supported on GFX10+";
5349 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5351 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5352 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5354 ErrInfo =
"Invalid dpp_ctrl value: "
5355 "row_newbroadcast/row_share is not supported before "
5360 ErrInfo =
"Invalid dpp_ctrl value: "
5361 "row_share and row_xmask are not supported before GFX10";
5366 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5368 ErrInfo =
"Invalid dpp_ctrl value: "
5369 "DP ALU dpp only support row_newbcast";
5376 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
5377 : AMDGPU::OpName::vdata;
5386 ErrInfo =
"Invalid register class: "
5387 "vdata and vdst should be both VGPR or AGPR";
5390 if (
Data && Data2 &&
5392 ErrInfo =
"Invalid register class: "
5393 "both data operands should be VGPR or AGPR";
5397 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5400 ErrInfo =
"Invalid register class: "
5401 "agpr loads and stores not supported on this GPU";
5408 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
5413 if (Reg.isPhysical())
5420 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
5421 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
5422 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
5424 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5425 ErrInfo =
"Subtarget requires even aligned vector registers "
5426 "for DS_GWS instructions";
5432 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5433 ErrInfo =
"Subtarget requires even aligned vector registers "
5434 "for vaddr operand of image instructions";
5440 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
5443 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5444 ErrInfo =
"Invalid register class: "
5445 "v_accvgpr_write with an SGPR is not supported on this GPU";
5450 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5453 ErrInfo =
"pseudo expects only physical SGPRs";
5465 switch (
MI.getOpcode()) {
5466 default:
return AMDGPU::INSTRUCTION_LIST_END;
5467 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5468 case AMDGPU::COPY:
return AMDGPU::COPY;
5469 case AMDGPU::PHI:
return AMDGPU::PHI;
5470 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5471 case AMDGPU::WQM:
return AMDGPU::WQM;
5472 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5473 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5474 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5475 case AMDGPU::S_MOV_B32: {
5477 return MI.getOperand(1).isReg() ||
5479 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5481 case AMDGPU::S_ADD_I32:
5482 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5483 case AMDGPU::S_ADDC_U32:
5484 return AMDGPU::V_ADDC_U32_e32;
5485 case AMDGPU::S_SUB_I32:
5486 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5489 case AMDGPU::S_ADD_U32:
5490 return AMDGPU::V_ADD_CO_U32_e32;
5491 case AMDGPU::S_SUB_U32:
5492 return AMDGPU::V_SUB_CO_U32_e32;
5493 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5494 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5495 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5496 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5497 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5498 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5499 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5500 case AMDGPU::S_XNOR_B32:
5501 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5502 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5503 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5504 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5505 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5506 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5507 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5508 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5509 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5510 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5511 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5512 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5513 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5514 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5515 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5516 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5517 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5518 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5519 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5520 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5521 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5522 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5523 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5524 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5525 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5526 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5527 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5528 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5529 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5530 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5531 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5532 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5533 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5534 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5535 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5536 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5537 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5538 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5539 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5540 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5541 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5542 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5543 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5544 case AMDGPU::S_CVT_F32_F16:
5545 case AMDGPU::S_CVT_HI_F32_F16:
5547 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5548 case AMDGPU::S_CVT_F16_F32:
5550 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5551 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5552 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5553 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5554 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5555 case AMDGPU::S_CEIL_F16:
5557 : AMDGPU::V_CEIL_F16_fake16_e64;
5558 case AMDGPU::S_FLOOR_F16:
5560 : AMDGPU::V_FLOOR_F16_fake16_e64;
5561 case AMDGPU::S_TRUNC_F16:
5562 return AMDGPU::V_TRUNC_F16_fake16_e64;
5563 case AMDGPU::S_RNDNE_F16:
5564 return AMDGPU::V_RNDNE_F16_fake16_e64;
5565 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5566 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5567 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5568 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5569 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5570 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5571 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5572 case AMDGPU::S_ADD_F16:
return AMDGPU::V_ADD_F16_fake16_e64;
5573 case AMDGPU::S_SUB_F16:
return AMDGPU::V_SUB_F16_fake16_e64;
5574 case AMDGPU::S_MIN_F16:
return AMDGPU::V_MIN_F16_fake16_e64;
5575 case AMDGPU::S_MAX_F16:
return AMDGPU::V_MAX_F16_fake16_e64;
5576 case AMDGPU::S_MINIMUM_F16:
return AMDGPU::V_MINIMUM_F16_e64;
5577 case AMDGPU::S_MAXIMUM_F16:
return AMDGPU::V_MAXIMUM_F16_e64;
5578 case AMDGPU::S_MUL_F16:
return AMDGPU::V_MUL_F16_fake16_e64;
5579 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5580 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5581 case AMDGPU::S_FMAC_F16:
return AMDGPU::V_FMAC_F16_fake16_e64;
5582 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5583 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5584 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5585 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5586 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5587 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5588 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5589 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5590 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5591 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5592 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5593 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5594 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5595 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5596 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5597 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5598 case AMDGPU::S_CMP_LT_F16:
5600 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5601 case AMDGPU::S_CMP_EQ_F16:
5603 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5604 case AMDGPU::S_CMP_LE_F16:
5606 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5607 case AMDGPU::S_CMP_GT_F16:
5609 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5610 case AMDGPU::S_CMP_LG_F16:
5612 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5613 case AMDGPU::S_CMP_GE_F16:
5615 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5616 case AMDGPU::S_CMP_O_F16:
5618 : AMDGPU::V_CMP_O_F16_fake16_e64;
5619 case AMDGPU::S_CMP_U_F16:
5621 : AMDGPU::V_CMP_U_F16_fake16_e64;
5622 case AMDGPU::S_CMP_NGE_F16:
5624 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5625 case AMDGPU::S_CMP_NLG_F16:
5627 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5628 case AMDGPU::S_CMP_NGT_F16:
5630 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5631 case AMDGPU::S_CMP_NLE_F16:
5633 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5634 case AMDGPU::S_CMP_NEQ_F16:
5636 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5637 case AMDGPU::S_CMP_NLT_F16:
5639 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5640 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5641 case AMDGPU::V_S_EXP_F16_e64:
return AMDGPU::V_EXP_F16_fake16_e64;
5642 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5643 case AMDGPU::V_S_LOG_F16_e64:
return AMDGPU::V_LOG_F16_fake16_e64;
5644 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5645 case AMDGPU::V_S_RCP_F16_e64:
return AMDGPU::V_RCP_F16_fake16_e64;
5646 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5647 case AMDGPU::V_S_RSQ_F16_e64:
return AMDGPU::V_RSQ_F16_fake16_e64;
5648 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5649 case AMDGPU::V_S_SQRT_F16_e64:
return AMDGPU::V_SQRT_F16_fake16_e64;
5652 "Unexpected scalar opcode without corresponding vector one!");
5665 bool IsWave32 = ST.isWave32();
5670 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5671 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5680 const unsigned OrSaveExec =
5681 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5694 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5696 auto ExecRestoreMI =
5706 bool IsAllocatable) {
5707 if ((IsAllocatable || !ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
5712 case AMDGPU::AV_32RegClassID:
5713 RCID = AMDGPU::VGPR_32RegClassID;
5715 case AMDGPU::AV_64RegClassID:
5716 RCID = AMDGPU::VReg_64RegClassID;
5718 case AMDGPU::AV_96RegClassID:
5719 RCID = AMDGPU::VReg_96RegClassID;
5721 case AMDGPU::AV_128RegClassID:
5722 RCID = AMDGPU::VReg_128RegClassID;
5724 case AMDGPU::AV_160RegClassID:
5725 RCID = AMDGPU::VReg_160RegClassID;
5727 case AMDGPU::AV_512RegClassID:
5728 RCID = AMDGPU::VReg_512RegClassID;
5744 auto RegClass = TID.
operands()[OpNum].RegClass;
5745 bool IsAllocatable =
false;
5755 AMDGPU::OpName::vdst);
5758 : AMDGPU::OpName::vdata);
5759 if (DataIdx != -1) {
5761 TID.
Opcode, AMDGPU::OpName::data1);
5769 unsigned OpNo)
const {
5772 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
5773 Desc.operands()[OpNo].RegClass == -1) {
5776 if (Reg.isVirtual())
5777 return MRI.getRegClass(Reg);
5778 return RI.getPhysRegBaseClass(Reg);
5781 unsigned RCID =
Desc.operands()[OpNo].RegClass;
5790 unsigned RCID =
get(
MI.getOpcode()).operands()[OpIdx].RegClass;
5792 unsigned Size = RI.getRegSizeInBits(*RC);
5793 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
5794 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
5795 : AMDGPU::V_MOV_B32_e32;
5797 Opcode = AMDGPU::COPY;
5799 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
5813 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
5819 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
5830 if (SubIdx == AMDGPU::sub0)
5832 if (SubIdx == AMDGPU::sub1)
5844void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
5860 if (Reg.isPhysical())
5871 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
5882 unsigned Opc =
MI.getOpcode();
5897 Opc,
isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5898 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
5899 MI.getOperand(DataIdx).isReg() &&
5900 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
5902 if ((
int)OpIdx == DataIdx) {
5903 if (VDstIdx != -1 &&
5904 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5908 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
5909 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5914 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts() &&
5941 MO = &
MI.getOperand(OpIdx);
5953 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
5959 if (!SGPRsUsed.
count(SGPR) &&
5962 if (--ConstantBusLimit <= 0)
5968 if (!LiteralLimit--)
5970 if (--ConstantBusLimit <= 0)
5989 bool Is64BitOp = Is64BitFPOp ||
6002 if (!Is64BitFPOp && (int32_t)Imm < 0)
6020 unsigned Opc =
MI.getOpcode();
6039 if (Opc == AMDGPU::V_WRITELANE_B32) {
6042 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6048 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6065 if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) {
6067 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6079 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6081 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6093 if (HasImplicitSGPR || !
MI.isCommutable()) {
6110 if (CommutedOpc == -1) {
6115 MI.setDesc(
get(CommutedOpc));
6119 bool Src0Kill = Src0.
isKill();
6123 else if (Src1.
isReg()) {
6138 unsigned Opc =
MI.getOpcode();
6146 if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6147 Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
6153 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6159 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6170 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6172 SGPRsUsed.
insert(SGPRReg);
6176 for (
int Idx : VOP3Idx) {
6185 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6210 if (ConstantBusLimit > 0) {
6222 if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
6223 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6233 SRC = RI.getCommonSubClass(SRC, DstRC);
6236 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6240 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6242 get(TargetOpcode::COPY), NewSrcReg)
6249 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6255 for (
unsigned i = 0; i < SubRegs; ++i) {
6256 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6258 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6265 get(AMDGPU::REG_SEQUENCE), DstReg);
6266 for (
unsigned i = 0; i < SubRegs; ++i) {
6281 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6283 SBase->setReg(SGPR);
6295 if (OldSAddrIdx < 0)
6312 if (NewVAddrIdx < 0)
6319 if (OldVAddrIdx >= 0) {
6321 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6322 if (!VAddrDef || VAddrDef->
getOpcode() != AMDGPU::V_MOV_B32_e32 ||
6333 if (OldVAddrIdx == NewVAddrIdx) {
6336 MRI.removeRegOperandFromUseList(&NewVAddr);
6337 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6341 MRI.removeRegOperandFromUseList(&NewVAddr);
6342 MRI.addRegOperandToUseList(&NewVAddr);
6344 assert(OldSAddrIdx == NewVAddrIdx);
6346 if (OldVAddrIdx >= 0) {
6348 AMDGPU::OpName::vdst_in);
6352 if (NewVDstIn != -1) {
6359 if (NewVDstIn != -1) {
6401 unsigned OpSubReg =
Op.getSubReg();
6410 Register DstReg =
MRI.createVirtualRegister(DstRC);
6420 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6423 bool ImpDef = Def->isImplicitDef();
6424 while (!ImpDef && Def && Def->isCopy()) {
6425 if (Def->getOperand(1).getReg().isPhysical())
6427 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6428 ImpDef = Def && Def->isImplicitDef();
6430 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6448 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6449 unsigned SaveExecOpc =
6450 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6451 unsigned XorTermOpc =
6452 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6454 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6455 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6461 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6462 unsigned NumSubRegs =
RegSize / 32;
6463 Register VScalarOp = ScalarOp->getReg();
6465 if (NumSubRegs == 1) {
6466 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6468 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6471 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6473 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6479 CondReg = NewCondReg;
6481 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6489 ScalarOp->setReg(CurReg);
6490 ScalarOp->setIsKill();
6494 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6495 "Unhandled register size");
6497 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6498 Register CurRegLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6499 Register CurRegHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6502 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6503 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6506 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6507 .
addReg(VScalarOp, VScalarOpUndef,
6508 TRI->getSubRegFromChannel(
Idx + 1));
6514 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6515 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6521 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6522 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6525 if (NumSubRegs <= 2)
6526 Cmp.addReg(VScalarOp);
6528 Cmp.addReg(VScalarOp, VScalarOpUndef,
6529 TRI->getSubRegFromChannel(
Idx, 2));
6533 CondReg = NewCondReg;
6535 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6543 const auto *SScalarOpRC =
6544 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6545 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6549 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6550 unsigned Channel = 0;
6551 for (
Register Piece : ReadlanePieces) {
6552 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6556 ScalarOp->setReg(SScalarOp);
6557 ScalarOp->setIsKill();
6561 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6562 MRI.setSimpleHint(SaveExec, CondReg);
6593 if (!Begin.isValid())
6595 if (!
End.isValid()) {
6600 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6601 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6602 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6611 std::numeric_limits<unsigned>::max()) !=
6614 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6620 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6629 for (
auto I = Begin;
I != AfterMI;
I++) {
6630 for (
auto &MO :
I->all_uses())
6631 MRI.clearKillFlags(MO.getReg());
6666 for (
auto &Succ : RemainderBB->
successors()) {
6689static std::tuple<unsigned, unsigned>
6697 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
6698 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
6701 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6702 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6703 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6704 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6705 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
6722 .
addImm(AMDGPU::sub0_sub1)
6728 return std::tuple(RsrcPtr, NewSRsrc);
6765 if (
MI.getOpcode() == AMDGPU::PHI) {
6767 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
6768 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
6771 MRI.getRegClass(
MI.getOperand(i).getReg());
6786 VRC = &AMDGPU::VReg_1RegClass;
6802 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6804 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6820 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
6827 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6829 if (!
Op.isReg() || !
Op.getReg().isVirtual())
6847 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
6852 if (DstRC != Src0RC) {
6861 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
6869 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
6870 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
6871 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
6872 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6873 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6874 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6875 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
6890 : AMDGPU::OpName::srsrc;
6895 int SampOpName =
isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
6904 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
6910 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
6911 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
6916 while (Start->getOpcode() != FrameSetupOpcode)
6919 while (
End->getOpcode() != FrameDestroyOpcode)
6923 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
6924 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
6932 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
6934 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6945 bool isSoffsetLegal =
true;
6948 if (SoffsetIdx != -1) {
6952 isSoffsetLegal =
false;
6956 bool isRsrcLegal =
true;
6959 if (RsrcIdx != -1) {
6962 isRsrcLegal =
false;
6966 if (isRsrcLegal && isSoffsetLegal)
6990 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6991 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6992 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
6995 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
6996 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
6998 unsigned RsrcPtr, NewSRsrc;
7005 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7012 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7030 "FIXME: Need to emit flat atomics here");
7032 unsigned RsrcPtr, NewSRsrc;
7035 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7058 MIB.
addImm(CPol->getImm());
7063 MIB.
addImm(TFE->getImm());
7083 MI.removeFromParent();
7088 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7090 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7094 if (!isSoffsetLegal) {
7106 if (!isSoffsetLegal) {
7115 InstrList.insert(
MI);
7119 if (RsrcIdx != -1) {
7120 DeferredList.insert(
MI);
7125 return DeferredList.contains(
MI);
7131 while (!Worklist.
empty()) {
7145 "Deferred MachineInstr are not supposed to re-populate worklist");
7163 case AMDGPU::S_ADD_U64_PSEUDO:
7164 NewOpcode = AMDGPU::V_ADD_U64_PSEUDO;
7166 case AMDGPU::S_SUB_U64_PSEUDO:
7167 NewOpcode = AMDGPU::V_SUB_U64_PSEUDO;
7169 case AMDGPU::S_ADD_I32:
7170 case AMDGPU::S_SUB_I32: {
7174 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7182 case AMDGPU::S_MUL_U64:
7184 splitScalarSMulU64(Worklist, Inst, MDT);
7188 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7189 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7192 splitScalarSMulPseudo(Worklist, Inst, MDT);
7196 case AMDGPU::S_AND_B64:
7197 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7201 case AMDGPU::S_OR_B64:
7202 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7206 case AMDGPU::S_XOR_B64:
7207 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7211 case AMDGPU::S_NAND_B64:
7212 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7216 case AMDGPU::S_NOR_B64:
7217 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7221 case AMDGPU::S_XNOR_B64:
7223 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7225 splitScalar64BitXnor(Worklist, Inst, MDT);
7229 case AMDGPU::S_ANDN2_B64:
7230 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7234 case AMDGPU::S_ORN2_B64:
7235 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7239 case AMDGPU::S_BREV_B64:
7240 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7244 case AMDGPU::S_NOT_B64:
7245 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7249 case AMDGPU::S_BCNT1_I32_B64:
7250 splitScalar64BitBCNT(Worklist, Inst);
7254 case AMDGPU::S_BFE_I64:
7255 splitScalar64BitBFE(Worklist, Inst);
7259 case AMDGPU::S_FLBIT_I32_B64:
7260 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7263 case AMDGPU::S_FF1_I32_B64:
7264 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7268 case AMDGPU::S_LSHL_B32:
7270 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7274 case AMDGPU::S_ASHR_I32:
7276 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7280 case AMDGPU::S_LSHR_B32:
7282 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7286 case AMDGPU::S_LSHL_B64:
7289 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7290 : AMDGPU::V_LSHLREV_B64_e64;
7294 case AMDGPU::S_ASHR_I64:
7296 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7300 case AMDGPU::S_LSHR_B64:
7302 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7307 case AMDGPU::S_ABS_I32:
7308 lowerScalarAbs(Worklist, Inst);
7312 case AMDGPU::S_CBRANCH_SCC0:
7313 case AMDGPU::S_CBRANCH_SCC1: {
7316 bool IsSCC = CondReg == AMDGPU::SCC;
7319 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7322 .
addReg(IsSCC ? VCC : CondReg);
7326 case AMDGPU::S_BFE_U64:
7327 case AMDGPU::S_BFM_B64:
7330 case AMDGPU::S_PACK_LL_B32_B16:
7331 case AMDGPU::S_PACK_LH_B32_B16:
7332 case AMDGPU::S_PACK_HL_B32_B16:
7333 case AMDGPU::S_PACK_HH_B32_B16:
7334 movePackToVALU(Worklist,
MRI, Inst);
7338 case AMDGPU::S_XNOR_B32:
7339 lowerScalarXnor(Worklist, Inst);
7343 case AMDGPU::S_NAND_B32:
7344 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7348 case AMDGPU::S_NOR_B32:
7349 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7353 case AMDGPU::S_ANDN2_B32:
7354 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7358 case AMDGPU::S_ORN2_B32:
7359 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7367 case AMDGPU::S_ADD_CO_PSEUDO:
7368 case AMDGPU::S_SUB_CO_PSEUDO: {
7369 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7370 ? AMDGPU::V_ADDC_U32_e64
7371 : AMDGPU::V_SUBB_U32_e64;
7375 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7376 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7394 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7398 case AMDGPU::S_UADDO_PSEUDO:
7399 case AMDGPU::S_USUBO_PSEUDO: {
7406 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7407 ? AMDGPU::V_ADD_CO_U32_e64
7408 : AMDGPU::V_SUB_CO_U32_e64;
7411 Register DestReg =
MRI.createVirtualRegister(NewRC);
7419 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7426 case AMDGPU::S_CSELECT_B32:
7427 case AMDGPU::S_CSELECT_B64:
7428 lowerSelect(Worklist, Inst, MDT);
7431 case AMDGPU::S_CMP_EQ_I32:
7432 case AMDGPU::S_CMP_LG_I32:
7433 case AMDGPU::S_CMP_GT_I32:
7434 case AMDGPU::S_CMP_GE_I32:
7435 case AMDGPU::S_CMP_LT_I32:
7436 case AMDGPU::S_CMP_LE_I32:
7437 case AMDGPU::S_CMP_EQ_U32:
7438 case AMDGPU::S_CMP_LG_U32:
7439 case AMDGPU::S_CMP_GT_U32:
7440 case AMDGPU::S_CMP_GE_U32:
7441 case AMDGPU::S_CMP_LT_U32:
7442 case AMDGPU::S_CMP_LE_U32:
7443 case AMDGPU::S_CMP_EQ_U64:
7444 case AMDGPU::S_CMP_LG_U64:
7445 case AMDGPU::S_CMP_LT_F32:
7446 case AMDGPU::S_CMP_EQ_F32:
7447 case AMDGPU::S_CMP_LE_F32:
7448 case AMDGPU::S_CMP_GT_F32:
7449 case AMDGPU::S_CMP_LG_F32:
7450 case AMDGPU::S_CMP_GE_F32:
7451 case AMDGPU::S_CMP_O_F32:
7452 case AMDGPU::S_CMP_U_F32:
7453 case AMDGPU::S_CMP_NGE_F32:
7454 case AMDGPU::S_CMP_NLG_F32:
7455 case AMDGPU::S_CMP_NGT_F32:
7456 case AMDGPU::S_CMP_NLE_F32:
7457 case AMDGPU::S_CMP_NEQ_F32:
7458 case AMDGPU::S_CMP_NLT_F32: {
7477 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7481 case AMDGPU::S_CMP_LT_F16:
7482 case AMDGPU::S_CMP_EQ_F16:
7483 case AMDGPU::S_CMP_LE_F16:
7484 case AMDGPU::S_CMP_GT_F16:
7485 case AMDGPU::S_CMP_LG_F16:
7486 case AMDGPU::S_CMP_GE_F16:
7487 case AMDGPU::S_CMP_O_F16:
7488 case AMDGPU::S_CMP_U_F16:
7489 case AMDGPU::S_CMP_NGE_F16:
7490 case AMDGPU::S_CMP_NLG_F16:
7491 case AMDGPU::S_CMP_NGT_F16:
7492 case AMDGPU::S_CMP_NLE_F16:
7493 case AMDGPU::S_CMP_NEQ_F16:
7494 case AMDGPU::S_CMP_NLT_F16: {
7516 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7520 case AMDGPU::S_CVT_HI_F32_F16: {
7522 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7523 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7529 .
addReg(TmpReg, 0, AMDGPU::hi16)
7545 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7549 case AMDGPU::S_MINIMUM_F32:
7550 case AMDGPU::S_MAXIMUM_F32:
7551 case AMDGPU::S_MINIMUM_F16:
7552 case AMDGPU::S_MAXIMUM_F16: {
7554 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7565 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7571 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
7579 if (NewOpcode == Opcode) {
7603 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
7615 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7616 MRI.replaceRegWith(DstReg, NewDstReg);
7618 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7629 AMDGPU::OpName::src0_modifiers) >= 0)
7634 Src.isReg() && RI.
isVGPR(
MRI, Src.getReg()))
7635 NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7637 NewInstr->addOperand(Src);
7640 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
7643 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
7645 NewInstr.addImm(
Size);
7646 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
7650 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
7655 "Scalar BFE is only implemented for constant width and offset");
7664 AMDGPU::OpName::src1_modifiers) >= 0)
7669 AMDGPU::OpName::src2_modifiers) >= 0)
7683 NewInstr->addOperand(
Op);
7690 if (
Op.getReg() == AMDGPU::SCC) {
7692 if (
Op.isDef() && !
Op.isDead())
7693 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
7695 addSCCDefsToVALUWorklist(NewInstr, Worklist);
7700 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
7701 Register DstReg = NewInstr->getOperand(0).getReg();
7706 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
7707 MRI.replaceRegWith(DstReg, NewDstReg);
7713 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
7717std::pair<bool, MachineBasicBlock *>
7729 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7732 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
7734 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
7735 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
7743 MRI.replaceRegWith(OldDstReg, ResultReg);
7746 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7747 return std::pair(
true, NewBB);
7750 return std::pair(
false,
nullptr);
7767 bool IsSCC = (CondReg == AMDGPU::SCC);
7775 MRI.replaceRegWith(Dest.
getReg(), CondReg);
7782 NewCondReg =
MRI.createVirtualRegister(TC);
7786 bool CopyFound =
false;
7790 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
7792 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
7794 .
addReg(CandI.getOperand(1).getReg());
7806 ST.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
7816 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
7817 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
7830 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
7832 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
7844 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7845 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7848 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
7858 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
7859 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
7874 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7882 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7883 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7889 bool Src0IsSGPR = Src0.
isReg() &&
7891 bool Src1IsSGPR = Src1.
isReg() &&
7894 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7895 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7905 }
else if (Src1IsSGPR) {
7919 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7923 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7929 unsigned Opcode)
const {
7939 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7940 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7952 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7953 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
7958 unsigned Opcode)
const {
7968 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7969 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7981 MRI.replaceRegWith(Dest.
getReg(), NewDest);
7982 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8000 &AMDGPU::SGPR_32RegClass;
8003 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8006 AMDGPU::sub0, Src0SubRC);
8011 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8013 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8017 AMDGPU::sub1, Src0SubRC);
8019 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8025 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8032 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8034 Worklist.
insert(&LoHalf);
8035 Worklist.
insert(&HiHalf);
8041 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8052 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8053 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8054 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8065 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8069 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8099 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8105 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8111 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8122 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8138 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8150 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8161 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8162 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8163 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8174 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8178 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8190 unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8191 ? AMDGPU::V_MUL_HI_U32_e64
8192 : AMDGPU::V_MUL_HI_I32_e64;
8207 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8215 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8234 &AMDGPU::SGPR_32RegClass;
8237 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8240 &AMDGPU::SGPR_32RegClass;
8243 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8246 AMDGPU::sub0, Src0SubRC);
8248 AMDGPU::sub0, Src1SubRC);
8250 AMDGPU::sub1, Src0SubRC);
8252 AMDGPU::sub1, Src1SubRC);
8257 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8259 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8264 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8269 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8276 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8278 Worklist.
insert(&LoHalf);
8279 Worklist.
insert(&HiHalf);
8282 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8300 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8316 Register NewDest =
MRI.createVirtualRegister(DestRC);
8322 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8340 MRI.getRegClass(Src.getReg()) :
8341 &AMDGPU::SGPR_32RegClass;
8343 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8344 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8347 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8350 AMDGPU::sub0, SrcSubRC);
8352 AMDGPU::sub1, SrcSubRC);
8358 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8362 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8381 Offset == 0 &&
"Not implemented");
8384 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8385 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8386 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8403 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8404 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8409 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8410 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8414 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8417 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8422 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8423 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8444 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8445 unsigned OpcodeAdd =
8446 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8449 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8451 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8458 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8459 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8460 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8461 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8468 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8474 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8476 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8478 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8481void SIInstrInfo::addUsersToMoveToVALUWorklist(
8485 E =
MRI.use_end();
I != E;) {
8490 switch (
UseMI.getOpcode()) {
8493 case AMDGPU::SOFT_WQM:
8494 case AMDGPU::STRICT_WWM:
8495 case AMDGPU::STRICT_WQM:
8496 case AMDGPU::REG_SEQUENCE:
8498 case AMDGPU::INSERT_SUBREG:
8501 OpNo =
I.getOperandNo();
8510 }
while (
I != E &&
I->getParent() == &
UseMI);
8520 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8527 case AMDGPU::S_PACK_LL_B32_B16: {
8528 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8529 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8546 case AMDGPU::S_PACK_LH_B32_B16: {
8547 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8556 case AMDGPU::S_PACK_HL_B32_B16: {
8557 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8567 case AMDGPU::S_PACK_HH_B32_B16: {
8568 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8569 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8586 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8587 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8596 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
8597 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
8605 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
8609 Register DestReg =
MI.getOperand(0).getReg();
8611 MRI.replaceRegWith(DestReg, NewCond);
8616 MI.getOperand(SCCIdx).setReg(NewCond);
8622 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
8625 for (
auto &Copy : CopyToDelete)
8626 Copy->eraseFromParent();
8634void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
8643 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
8645 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
8662 case AMDGPU::REG_SEQUENCE:
8663 case AMDGPU::INSERT_SUBREG:
8665 case AMDGPU::SOFT_WQM:
8666 case AMDGPU::STRICT_WWM:
8667 case AMDGPU::STRICT_WQM: {
8675 case AMDGPU::REG_SEQUENCE:
8676 case AMDGPU::INSERT_SUBREG:
8686 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
8703 int OpIndices[3])
const {
8722 for (
unsigned i = 0; i < 3; ++i) {
8723 int Idx = OpIndices[i];
8760 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
8761 SGPRReg = UsedSGPRs[0];
8764 if (!SGPRReg && UsedSGPRs[1]) {
8765 if (UsedSGPRs[1] == UsedSGPRs[2])
8766 SGPRReg = UsedSGPRs[1];
8773 unsigned OperandName)
const {
8778 return &
MI.getOperand(
Idx);
8795 RsrcDataFormat |= (1ULL << 56);
8800 RsrcDataFormat |= (2ULL << 59);
8803 return RsrcDataFormat;
8825 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
8831 unsigned Opc =
MI.getOpcode();
8837 return get(Opc).mayLoad() &&
8842 int &FrameIndex)
const {
8850 FrameIndex =
Addr->getIndex();
8855 int &FrameIndex)
const {
8858 FrameIndex =
Addr->getIndex();
8863 int &FrameIndex)
const {
8877 int &FrameIndex)
const {
8894 while (++
I != E &&
I->isInsideBundle()) {
8895 assert(!
I->isBundle() &&
"No nested bundle!");
8903 unsigned Opc =
MI.getOpcode();
8905 unsigned DescSize =
Desc.getSize();
8910 unsigned Size = DescSize;
8925 bool HasLiteral =
false;
8926 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
8934 return HasLiteral ? DescSize + 4 : DescSize;
8944 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
8948 case TargetOpcode::BUNDLE:
8950 case TargetOpcode::INLINEASM:
8951 case TargetOpcode::INLINEASM_BR: {
8953 const char *AsmStr =
MI.getOperand(0).getSymbolName();
8957 if (
MI.isMetaInstruction())
8967 if (
MI.memoperands_empty())
8979 static const std::pair<int, const char *> TargetIndices[] = {
9017std::pair<unsigned, unsigned>
9024 static const std::pair<unsigned, const char *> TargetFlags[] = {
9039 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9053 return AMDGPU::WWM_COPY;
9055 return AMDGPU::COPY;
9067 bool IsNullOrVectorRegister =
true;
9075 return IsNullOrVectorRegister &&
9077 (Opcode == AMDGPU::IMPLICIT_DEF &&
9079 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9080 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9093 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
9124 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9125 case AMDGPU::SI_KILL_I1_TERMINATOR:
9134 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9135 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9136 case AMDGPU::SI_KILL_I1_PSEUDO:
9137 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9149 const unsigned OffsetBits =
9151 return (1 << OffsetBits) - 1;
9158 if (
MI.isInlineAsm())
9161 for (
auto &
Op :
MI.implicit_operands()) {
9162 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9163 Op.setReg(AMDGPU::VCC_LO);
9176 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9194 if (Imm <= MaxImm + 64) {
9196 Overflow = Imm - MaxImm;
9286std::pair<int64_t, int64_t>
9289 int64_t RemainderOffset = COffsetVal;
9290 int64_t ImmField = 0;
9295 if (AllowNegative) {
9297 int64_t
D = 1LL << NumBits;
9298 RemainderOffset = (COffsetVal /
D) *
D;
9299 ImmField = COffsetVal - RemainderOffset;
9303 (ImmField % 4) != 0) {
9305 RemainderOffset += ImmField % 4;
9306 ImmField -= ImmField % 4;
9308 }
else if (COffsetVal >= 0) {
9309 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9310 RemainderOffset = COffsetVal - ImmField;
9314 assert(RemainderOffset + ImmField == COffsetVal);
9315 return {ImmField, RemainderOffset};
9327 switch (ST.getGeneration()) {
9352 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9353 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9354 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9355 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9356 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9357 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9358 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9359 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9366#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9367 case OPCODE##_dpp: \
9368 case OPCODE##_e32: \
9369 case OPCODE##_e64: \
9370 case OPCODE##_e64_dpp: \
9385 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9386 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9387 case AMDGPU::V_FMA_F16_gfx9_e64:
9388 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9389 case AMDGPU::V_INTERP_P2_F16:
9390 case AMDGPU::V_MAD_F16_e64:
9391 case AMDGPU::V_MAD_U16_e64:
9392 case AMDGPU::V_MAD_I16_e64:
9427 if (
isMAI(Opcode)) {
9472 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9473 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9474 auto &RegOp =
MI.getOperand(1 + 2 *
I);
9486 switch (
MI.getOpcode()) {
9488 case AMDGPU::REG_SEQUENCE:
9492 case AMDGPU::INSERT_SUBREG:
9493 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
9510 if (!
P.Reg.isVirtual())
9514 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
9515 while (
auto *
MI = DefInst) {
9517 switch (
MI->getOpcode()) {
9519 case AMDGPU::V_MOV_B32_e32: {
9520 auto &Op1 =
MI->getOperand(1);
9525 DefInst =
MRI.getVRegDef(RSR.Reg);
9533 DefInst =
MRI.getVRegDef(RSR.Reg);
9546 assert(
MRI.isSSA() &&
"Must be run on SSA");
9548 auto *
TRI =
MRI.getTargetRegisterInfo();
9549 auto *DefBB =
DefMI.getParent();
9553 if (
UseMI.getParent() != DefBB)
9556 const int MaxInstScan = 20;
9560 auto E =
UseMI.getIterator();
9561 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
9562 if (
I->isDebugInstr())
9565 if (++NumInst > MaxInstScan)
9568 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
9578 assert(
MRI.isSSA() &&
"Must be run on SSA");
9580 auto *
TRI =
MRI.getTargetRegisterInfo();
9581 auto *DefBB =
DefMI.getParent();
9583 const int MaxUseScan = 10;
9586 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
9587 auto &UseInst = *
Use.getParent();
9590 if (UseInst.getParent() != DefBB || UseInst.isPHI())
9593 if (++NumUse > MaxUseScan)
9600 const int MaxInstScan = 20;
9604 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
9607 if (
I->isDebugInstr())
9610 if (++NumInst > MaxInstScan)
9623 if (Reg == VReg && --NumUse == 0)
9625 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
9637 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
9640 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
9649 if (InsPt !=
MBB.
end() &&
9650 (InsPt->getOpcode() == AMDGPU::SI_IF ||
9651 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
9652 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
9653 InsPt->definesRegister(Src,
nullptr)) {
9657 : AMDGPU::S_MOV_B64_term),
9659 .
addReg(Src, 0, SrcSubReg)
9684 if (isFullCopyInstr(
MI)) {
9693 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
9697 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
9708 unsigned *PredCost)
const {
9709 if (
MI.isBundle()) {
9712 unsigned Lat = 0, Count = 0;
9713 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
9715 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
9717 return Lat + Count - 1;
9720 return SchedModel.computeInstrLatency(&
MI);
9725 unsigned opcode =
MI.getOpcode();
9726 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
9727 auto IID = GI->getIntrinsicID();
9734 case Intrinsic::amdgcn_if:
9735 case Intrinsic::amdgcn_else:
9749 if (opcode == AMDGPU::G_LOAD) {
9750 if (
MI.memoperands_empty())
9754 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9755 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9763 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
9764 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
9765 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
9778 unsigned opcode =
MI.getOpcode();
9779 if (opcode == AMDGPU::V_READLANE_B32 ||
9780 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9781 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
9784 if (isCopyInstr(
MI)) {
9788 RI.getPhysRegBaseClass(srcOp.
getReg());
9796 if (
MI.isPreISelOpcode())
9811 if (
MI.memoperands_empty())
9815 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
9816 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
9831 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
9837 if (!Reg || !
SrcOp.readsReg())
9843 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
9880 Register &SrcReg2, int64_t &CmpMask,
9881 int64_t &CmpValue)
const {
9882 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
9885 switch (
MI.getOpcode()) {
9888 case AMDGPU::S_CMP_EQ_U32:
9889 case AMDGPU::S_CMP_EQ_I32:
9890 case AMDGPU::S_CMP_LG_U32:
9891 case AMDGPU::S_CMP_LG_I32:
9892 case AMDGPU::S_CMP_LT_U32:
9893 case AMDGPU::S_CMP_LT_I32:
9894 case AMDGPU::S_CMP_GT_U32:
9895 case AMDGPU::S_CMP_GT_I32:
9896 case AMDGPU::S_CMP_LE_U32:
9897 case AMDGPU::S_CMP_LE_I32:
9898 case AMDGPU::S_CMP_GE_U32:
9899 case AMDGPU::S_CMP_GE_I32:
9900 case AMDGPU::S_CMP_EQ_U64:
9901 case AMDGPU::S_CMP_LG_U64:
9902 SrcReg =
MI.getOperand(0).getReg();
9903 if (
MI.getOperand(1).isReg()) {
9904 if (
MI.getOperand(1).getSubReg())
9906 SrcReg2 =
MI.getOperand(1).getReg();
9908 }
else if (
MI.getOperand(1).isImm()) {
9910 CmpValue =
MI.getOperand(1).getImm();
9916 case AMDGPU::S_CMPK_EQ_U32:
9917 case AMDGPU::S_CMPK_EQ_I32:
9918 case AMDGPU::S_CMPK_LG_U32:
9919 case AMDGPU::S_CMPK_LG_I32:
9920 case AMDGPU::S_CMPK_LT_U32:
9921 case AMDGPU::S_CMPK_LT_I32:
9922 case AMDGPU::S_CMPK_GT_U32:
9923 case AMDGPU::S_CMPK_GT_I32:
9924 case AMDGPU::S_CMPK_LE_U32:
9925 case AMDGPU::S_CMPK_LE_I32:
9926 case AMDGPU::S_CMPK_GE_U32:
9927 case AMDGPU::S_CMPK_GE_I32:
9928 SrcReg =
MI.getOperand(0).getReg();
9930 CmpValue =
MI.getOperand(1).getImm();
9948 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
9949 this](int64_t ExpectedValue,
unsigned SrcSize,
9950 bool IsReversible,
bool IsSigned) ->
bool {
9975 if (!Def || Def->getParent() != CmpInstr.
getParent())
9978 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
9979 Def->getOpcode() != AMDGPU::S_AND_B64)
9983 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
9994 SrcOp = &Def->getOperand(2);
9995 else if (isMask(&Def->getOperand(2)))
9996 SrcOp = &Def->getOperand(1);
10002 assert(llvm::has_single_bit<uint64_t>(Mask) &&
"Invalid mask.");
10004 if (IsSigned && BitNo == SrcSize - 1)
10007 ExpectedValue <<= BitNo;
10009 bool IsReversedCC =
false;
10010 if (CmpValue != ExpectedValue) {
10013 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10018 Register DefReg = Def->getOperand(0).getReg();
10019 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10022 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10024 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10025 I->killsRegister(AMDGPU::SCC, &RI))
10030 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10034 if (!
MRI->use_nodbg_empty(DefReg)) {
10042 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10043 : AMDGPU::S_BITCMP1_B32
10044 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10045 : AMDGPU::S_BITCMP1_B64;
10050 Def->eraseFromParent();
10058 case AMDGPU::S_CMP_EQ_U32:
10059 case AMDGPU::S_CMP_EQ_I32:
10060 case AMDGPU::S_CMPK_EQ_U32:
10061 case AMDGPU::S_CMPK_EQ_I32:
10062 return optimizeCmpAnd(1, 32,
true,
false);
10063 case AMDGPU::S_CMP_GE_U32:
10064 case AMDGPU::S_CMPK_GE_U32:
10065 return optimizeCmpAnd(1, 32,
false,
false);
10066 case AMDGPU::S_CMP_GE_I32:
10067 case AMDGPU::S_CMPK_GE_I32:
10068 return optimizeCmpAnd(1, 32,
false,
true);
10069 case AMDGPU::S_CMP_EQ_U64:
10070 return optimizeCmpAnd(1, 64,
true,
false);
10071 case AMDGPU::S_CMP_LG_U32:
10072 case AMDGPU::S_CMP_LG_I32:
10073 case AMDGPU::S_CMPK_LG_U32:
10074 case AMDGPU::S_CMPK_LG_I32:
10075 return optimizeCmpAnd(0, 32,
true,
false);
10076 case AMDGPU::S_CMP_GT_U32:
10077 case AMDGPU::S_CMPK_GT_U32:
10078 return optimizeCmpAnd(0, 32,
false,
false);
10079 case AMDGPU::S_CMP_GT_I32:
10080 case AMDGPU::S_CMPK_GT_I32:
10081 return optimizeCmpAnd(0, 32,
false,
true);
10082 case AMDGPU::S_CMP_LG_U64:
10083 return optimizeCmpAnd(0, 64,
true,
false);
10090 unsigned OpName)
const {
10108 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10111 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10112 : &AMDGPU::VReg_64_Align2RegClass);
10114 .
addReg(DataReg, 0,
Op.getSubReg())
10119 Op.setSubReg(AMDGPU::sub0);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
TargetInstrInfo::RegSubRegPair RegSubRegPair
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillRestoreOpcode(unsigned Size)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI)
static unsigned getAGPRSpillSaveOpcode(unsigned Size)
static bool resultDependsOnExec(const MachineInstr &MI)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static constexpr unsigned ModifierOpNames[]
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
bool hasVALUReadSGPRHazard() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
iterator_range< mop_iterator > explicit_operands()
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static bool isVIMAGE(const MachineInstr &MI)
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, int64_t Value) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool isVGPRCopy(const MachineInstr &MI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, const MachineOperand *fromMO, unsigned toIdx, const MachineOperand *toMO) const
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.