21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
65class TargetLibraryInfo;
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
207class TargetTransformInfo;
338 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
346 const PointersChainInfo &
Info,
Type *AccessTy,
497 std::pair<const Value *, unsigned>
693 KnownBits & Known,
bool &KnownBitsComputed)
const;
700 SimplifyAndSetOp)
const;
744 bool HasBaseReg, int64_t Scale,
746 int64_t ScalableOffset = 0)
const;
822 Align Alignment,
unsigned AddrSpace)
const;
866 unsigned AddrSpace = 0)
const;
910 unsigned ScalarOpdIdx)
const;
928 const APInt &DemandedElts,
929 bool Insert,
bool Extract,
938 ArrayRef<Type *> Tys,
994 bool IsZeroCmp)
const;
1026 unsigned *
Fast =
nullptr)
const;
1211 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1222 Type *ScalarValTy)
const;
1228 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1275 unsigned NumStridedMemAccesses,
1276 unsigned NumPrefetches,
bool HasCall)
const;
1301 std::optional<unsigned> BinOp = std::nullopt)
const;
1330 unsigned Opcode,
Type *Ty,
1334 ArrayRef<const Value *>
Args = {},
const Instruction *CxtI =
nullptr,
1335 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1345 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1346 const SmallBitVector &OpcodeMask,
1361 ArrayRef<const Value *>
Args = {},
1362 const Instruction *CxtI =
nullptr)
const;
1413 unsigned Index)
const;
1435 const Instruction *
I =
nullptr)
const;
1444 unsigned Index = -1, Value *Op0 =
nullptr,
1445 Value *Op1 =
nullptr)
const;
1459 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const;
1469 unsigned Index = -1)
const;
1478 const APInt &DemandedDstElts,
1487 const Instruction *
I =
nullptr)
const;
1511 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1525 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1543 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1548 return FMF && !(*FMF).allowReassoc();
1576 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1625 const SCEV *
Ptr =
nullptr)
const;
1649 Type *ExpectedType)
const;
1654 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1655 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1665 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1667 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1681 unsigned DefaultCallPenalty)
const;
1717 unsigned AddrSpace)
const;
1721 unsigned AddrSpace)
const;
1733 unsigned ChainSizeInBytes,
1739 unsigned ChainSizeInBytes,
1811 Align Alignment)
const;
1896 template <
typename T>
class Model;
1898 std::unique_ptr<Concept> TTIImpl;
1946 virtual std::pair<const Value *, unsigned>
1950 Value *NewV)
const = 0;
1969 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1974 SimplifyAndSetOp) = 0;
1979 int64_t BaseOffset,
bool HasBaseReg,
1980 int64_t Scale,
unsigned AddrSpace,
1982 int64_t ScalableOffset) = 0;
2003 Align Alignment) = 0;
2005 Align Alignment) = 0;
2011 unsigned AddrSpace) = 0;
2023 bool HasBaseReg, int64_t Scale,
2024 unsigned AddrSpace) = 0;
2037 unsigned ScalarOpdIdx) = 0;
2066 unsigned *
Fast) = 0;
2088 Type *Ty =
nullptr)
const = 0;
2098 bool IsScalable)
const = 0;
2099 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
2101 Type *ScalarValTy)
const = 0;
2103 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2122 unsigned NumStridedMemAccesses,
2123 unsigned NumPrefetches,
2124 bool HasCall)
const = 0;
2149 std::optional<unsigned> BinOp)
const = 0;
2157 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2171 unsigned Index) = 0;
2192 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2196 unsigned Index) = 0;
2200 const APInt &DemandedDstElts,
2218 bool VariableMask,
Align Alignment,
2223 bool VariableMask,
Align Alignment,
2230 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2233 std::optional<FastMathFlags> FMF,
2260 Type *ExpectedType) = 0;
2263 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2264 std::optional<uint32_t> AtomicElementSize)
const = 0;
2268 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2270 std::optional<uint32_t> AtomicCpySize)
const = 0;
2274 unsigned DefaultCallPenalty)
const = 0;
2285 unsigned AddrSpace)
const = 0;
2288 unsigned AddrSpace)
const = 0;
2293 unsigned ChainSizeInBytes,
2296 unsigned ChainSizeInBytes,
2313 Align Alignment)
const = 0;
2329template <
typename T>
2334 Model(
T Impl) : Impl(std::move(Impl)) {}
2335 ~Model()
override =
default;
2337 const DataLayout &getDataLayout()
const override {
2338 return Impl.getDataLayout();
2342 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2343 ArrayRef<const Value *>
Operands, Type *AccessType,
2347 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2349 const PointersChainInfo &
Info,
2354 unsigned getInliningThresholdMultiplier()
const override {
2355 return Impl.getInliningThresholdMultiplier();
2357 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2358 return Impl.adjustInliningThreshold(CB);
2360 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2361 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2363 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2364 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2366 int getInliningLastCallToStaticBonus()
const override {
2367 return Impl.getInliningLastCallToStaticBonus();
2369 int getInlinerVectorBonusPercent()
const override {
2370 return Impl.getInlinerVectorBonusPercent();
2372 unsigned getCallerAllocaCost(
const CallBase *CB,
2373 const AllocaInst *AI)
const override {
2374 return Impl.getCallerAllocaCost(CB, AI);
2376 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2377 return Impl.getMemcpyCost(
I);
2380 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2381 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2384 InstructionCost getInstructionCost(
const User *U,
2389 BranchProbability getPredictableBranchThreshold()
override {
2390 return Impl.getPredictableBranchThreshold();
2392 InstructionCost getBranchMispredictPenalty()
override {
2393 return Impl.getBranchMispredictPenalty();
2395 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2396 return Impl.hasBranchDivergence(
F);
2398 bool isSourceOfDivergence(
const Value *V)
override {
2399 return Impl.isSourceOfDivergence(V);
2402 bool isAlwaysUniform(
const Value *V)
override {
2403 return Impl.isAlwaysUniform(V);
2406 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2407 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2410 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2411 return Impl.addrspacesMayAlias(AS0, AS1);
2414 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2416 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2418 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2421 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2422 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2426 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2427 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2430 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2431 return Impl.getAssumedAddrSpace(V);
2434 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2436 std::pair<const Value *, unsigned>
2437 getPredicatedAddrSpace(
const Value *V)
const override {
2438 return Impl.getPredicatedAddrSpace(V);
2441 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2442 Value *NewV)
const override {
2443 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2446 bool isLoweredToCall(
const Function *
F)
override {
2447 return Impl.isLoweredToCall(
F);
2449 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2450 UnrollingPreferences &UP,
2451 OptimizationRemarkEmitter *ORE)
override {
2452 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2454 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2455 PeelingPreferences &PP)
override {
2456 return Impl.getPeelingPreferences(L, SE, PP);
2458 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2459 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2460 HardwareLoopInfo &HWLoopInfo)
override {
2461 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2463 unsigned getEpilogueVectorizationMinVF()
override {
2464 return Impl.getEpilogueVectorizationMinVF();
2466 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2467 return Impl.preferPredicateOverEpilogue(TFI);
2470 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2471 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2473 std::optional<Instruction *>
2474 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2475 return Impl.instCombineIntrinsic(IC,
II);
2477 std::optional<Value *>
2478 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2479 APInt DemandedMask, KnownBits &Known,
2480 bool &KnownBitsComputed)
override {
2481 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2484 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2485 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2486 APInt &UndefElts2, APInt &UndefElts3,
2487 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2488 SimplifyAndSetOp)
override {
2489 return Impl.simplifyDemandedVectorEltsIntrinsic(
2490 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2493 bool isLegalAddImmediate(int64_t Imm)
override {
2494 return Impl.isLegalAddImmediate(Imm);
2496 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2497 return Impl.isLegalAddScalableImmediate(Imm);
2499 bool isLegalICmpImmediate(int64_t Imm)
override {
2500 return Impl.isLegalICmpImmediate(Imm);
2502 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2503 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2504 Instruction *
I, int64_t ScalableOffset)
override {
2505 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2506 AddrSpace,
I, ScalableOffset);
2508 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2509 const TargetTransformInfo::LSRCost &C2)
override {
2510 return Impl.isLSRCostLess(C1, C2);
2512 bool isNumRegsMajorCostOfLSR()
override {
2513 return Impl.isNumRegsMajorCostOfLSR();
2515 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2516 return Impl.shouldDropLSRSolutionIfLessProfitable();
2518 bool isProfitableLSRChainElement(Instruction *
I)
override {
2519 return Impl.isProfitableLSRChainElement(
I);
2521 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2522 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2523 DominatorTree *DT, AssumptionCache *AC,
2524 TargetLibraryInfo *LibInfo)
override {
2525 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2528 getPreferredAddressingMode(
const Loop *L,
2529 ScalarEvolution *SE)
const override {
2530 return Impl.getPreferredAddressingMode(L, SE);
2532 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2533 return Impl.isLegalMaskedStore(DataType, Alignment);
2535 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2536 return Impl.isLegalMaskedLoad(DataType, Alignment);
2538 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2539 return Impl.isLegalNTStore(DataType, Alignment);
2541 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2542 return Impl.isLegalNTLoad(DataType, Alignment);
2544 bool isLegalBroadcastLoad(Type *ElementTy,
2545 ElementCount NumElements)
const override {
2546 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2548 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2549 return Impl.isLegalMaskedScatter(DataType, Alignment);
2551 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2552 return Impl.isLegalMaskedGather(DataType, Alignment);
2554 bool forceScalarizeMaskedGather(
VectorType *DataType,
2555 Align Alignment)
override {
2556 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2558 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2559 Align Alignment)
override {
2560 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2562 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2563 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2565 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2566 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2568 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2569 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2571 bool isLegalInterleavedAccessType(
VectorType *VTy,
unsigned Factor,
2573 unsigned AddrSpace)
override {
2574 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2576 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2577 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2579 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2580 const SmallBitVector &OpcodeMask)
const override {
2581 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2583 bool enableOrderedReductions()
override {
2584 return Impl.enableOrderedReductions();
2586 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2587 return Impl.hasDivRemOp(DataType, IsSigned);
2589 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2590 return Impl.hasVolatileVariant(
I, AddrSpace);
2592 bool prefersVectorizedAddressing()
override {
2593 return Impl.prefersVectorizedAddressing();
2595 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2596 StackOffset BaseOffset,
bool HasBaseReg,
2598 unsigned AddrSpace)
override {
2599 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2602 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2603 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2604 return Impl.isTruncateFree(Ty1, Ty2);
2606 bool isProfitableToHoist(Instruction *
I)
override {
2607 return Impl.isProfitableToHoist(
I);
2609 bool useAA()
override {
return Impl.useAA(); }
2610 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2611 unsigned getRegUsageForType(Type *Ty)
override {
2612 return Impl.getRegUsageForType(Ty);
2614 bool shouldBuildLookupTables()
override {
2615 return Impl.shouldBuildLookupTables();
2617 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2618 return Impl.shouldBuildLookupTablesForConstant(
C);
2620 bool shouldBuildRelLookupTables()
override {
2621 return Impl.shouldBuildRelLookupTables();
2623 bool useColdCCForColdCall(Function &
F)
override {
2624 return Impl.useColdCCForColdCall(
F);
2626 bool isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID)
override {
2627 return Impl.isTargetIntrinsicTriviallyScalarizable(
ID);
2631 unsigned ScalarOpdIdx)
override {
2632 return Impl.isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
2636 int OpdIdx)
override {
2637 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
2640 bool isTargetIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID,
2641 int RetIdx)
override {
2642 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
2645 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2646 const APInt &DemandedElts,
2647 bool Insert,
bool Extract,
2649 ArrayRef<Value *> VL = {})
override {
2650 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2654 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2655 ArrayRef<Type *> Tys,
2657 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2660 bool supportsEfficientVectorElementLoadStore()
override {
2661 return Impl.supportsEfficientVectorElementLoadStore();
2664 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2665 bool supportsTailCallFor(
const CallBase *CB)
override {
2666 return Impl.supportsTailCallFor(CB);
2669 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2670 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2672 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2673 bool IsZeroCmp)
const override {
2674 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2676 bool enableSelectOptimize()
override {
2677 return Impl.enableSelectOptimize();
2679 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2680 return Impl.shouldTreatInstructionLikeSelect(
I);
2682 bool enableInterleavedAccessVectorization()
override {
2683 return Impl.enableInterleavedAccessVectorization();
2685 bool enableMaskedInterleavedAccessVectorization()
override {
2686 return Impl.enableMaskedInterleavedAccessVectorization();
2688 bool isFPVectorizationPotentiallyUnsafe()
override {
2689 return Impl.isFPVectorizationPotentiallyUnsafe();
2691 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2693 unsigned *
Fast)
override {
2698 return Impl.getPopcntSupport(IntTyWidthInBit);
2700 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2702 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2703 return Impl.isExpensiveToSpeculativelyExecute(
I);
2706 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2707 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2710 InstructionCost getFPOpCost(Type *Ty)
override {
2711 return Impl.getFPOpCost(Ty);
2714 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2715 const APInt &Imm, Type *Ty)
override {
2716 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2718 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2720 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2722 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2723 const APInt &Imm, Type *Ty,
2725 Instruction *Inst =
nullptr)
override {
2726 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2729 const APInt &Imm, Type *Ty,
2731 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2733 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2734 const Function &Fn)
const override {
2735 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2737 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2738 return Impl.getNumberOfRegisters(ClassID);
2740 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2741 return Impl.hasConditionalLoadStoreForType(Ty);
2743 unsigned getRegisterClassForType(
bool Vector,
2744 Type *Ty =
nullptr)
const override {
2745 return Impl.getRegisterClassForType(
Vector, Ty);
2747 const char *getRegisterClassName(
unsigned ClassID)
const override {
2748 return Impl.getRegisterClassName(ClassID);
2750 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2751 return Impl.getRegisterBitWidth(K);
2753 unsigned getMinVectorRegisterBitWidth()
const override {
2754 return Impl.getMinVectorRegisterBitWidth();
2756 std::optional<unsigned>
getMaxVScale()
const override {
2757 return Impl.getMaxVScale();
2759 std::optional<unsigned> getVScaleForTuning()
const override {
2760 return Impl.getVScaleForTuning();
2762 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2763 return Impl.isVScaleKnownToBeAPowerOfTwo();
2765 bool shouldMaximizeVectorBandwidth(
2767 return Impl.shouldMaximizeVectorBandwidth(K);
2769 ElementCount getMinimumVF(
unsigned ElemWidth,
2770 bool IsScalable)
const override {
2771 return Impl.getMinimumVF(ElemWidth, IsScalable);
2773 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2774 return Impl.getMaximumVF(ElemWidth, Opcode);
2776 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2777 Type *ScalarValTy)
const override {
2778 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2780 bool shouldConsiderAddressTypePromotion(
2781 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2782 return Impl.shouldConsiderAddressTypePromotion(
2783 I, AllowPromotionWithoutCommonHeader);
2785 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2786 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2787 return Impl.getCacheSize(Level);
2789 std::optional<unsigned>
2790 getCacheAssociativity(
CacheLevel Level)
const override {
2791 return Impl.getCacheAssociativity(Level);
2794 std::optional<unsigned> getMinPageSize()
const override {
2795 return Impl.getMinPageSize();
2800 unsigned getPrefetchDistance()
const override {
2801 return Impl.getPrefetchDistance();
2807 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2808 unsigned NumStridedMemAccesses,
2809 unsigned NumPrefetches,
2810 bool HasCall)
const override {
2811 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2812 NumPrefetches, HasCall);
2818 unsigned getMaxPrefetchIterationsAhead()
const override {
2819 return Impl.getMaxPrefetchIterationsAhead();
2823 bool enableWritePrefetching()
const override {
2824 return Impl.enableWritePrefetching();
2828 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2829 return Impl.shouldPrefetchAddressSpace(AS);
2832 InstructionCost getPartialReductionCost(
2833 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2836 std::optional<unsigned> BinOp = std::nullopt)
const override {
2837 return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2838 AccumType, VF, OpAExtend, OpBExtend,
2842 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2843 return Impl.getMaxInterleaveFactor(VF);
2845 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2847 ProfileSummaryInfo *PSI,
2848 BlockFrequencyInfo *BFI)
override {
2849 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2851 InstructionCost getArithmeticInstrCost(
2853 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2854 ArrayRef<const Value *> Args,
2855 const Instruction *CxtI =
nullptr)
override {
2856 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2859 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2861 const SmallBitVector &OpcodeMask,
2863 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2870 ArrayRef<const Value *> Args,
2871 const Instruction *CxtI)
override {
2872 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2875 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2878 const Instruction *
I)
override {
2879 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2881 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2883 unsigned Index)
override {
2884 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2887 const Instruction *
I =
nullptr)
override {
2888 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2890 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2893 OperandValueInfo Op1Info,
2894 OperandValueInfo Op2Info,
2895 const Instruction *
I)
override {
2896 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
2897 Op1Info, Op2Info,
I);
2899 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2901 unsigned Index, Value *Op0,
2902 Value *Op1)
override {
2903 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2905 InstructionCost getVectorInstrCost(
2908 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
override {
2909 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Scalar,
2912 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2914 unsigned Index)
override {
2918 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2919 const APInt &DemandedDstElts,
2921 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2924 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2927 OperandValueInfo OpInfo,
2928 const Instruction *
I)
override {
2932 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2935 const Instruction *
I)
override {
2936 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2939 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2942 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2946 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2947 bool VariableMask, Align Alignment,
2949 const Instruction *
I =
nullptr)
override {
2950 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2954 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2955 bool VariableMask, Align Alignment,
2957 const Instruction *
I =
nullptr)
override {
2958 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2961 InstructionCost getInterleavedMemoryOpCost(
2962 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2964 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2965 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2967 UseMaskForCond, UseMaskForGaps);
2970 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2971 std::optional<FastMathFlags> FMF,
2973 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2978 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2981 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2984 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2988 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2990 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2992 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2994 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2996 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2997 ArrayRef<Type *> Tys,
3001 unsigned getNumberOfParts(Type *Tp)
override {
3002 return Impl.getNumberOfParts(Tp);
3004 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
3005 const SCEV *
Ptr)
override {
3006 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
3008 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
3009 return Impl.getCostOfKeepingLiveOverCall(Tys);
3011 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
3012 MemIntrinsicInfo &
Info)
override {
3013 return Impl.getTgtMemIntrinsic(Inst,
Info);
3015 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
3016 return Impl.getAtomicMemIntrinsicMaxElementSize();
3018 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
3019 Type *ExpectedType)
override {
3020 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
3022 Type *getMemcpyLoopLoweringType(
3023 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
3024 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
3025 std::optional<uint32_t> AtomicElementSize)
const override {
3026 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
3027 DestAddrSpace, SrcAlign, DestAlign,
3030 void getMemcpyLoopResidualLoweringType(
3031 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
3032 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
3033 Align SrcAlign, Align DestAlign,
3034 std::optional<uint32_t> AtomicCpySize)
const override {
3035 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
3036 SrcAddrSpace, DestAddrSpace,
3037 SrcAlign, DestAlign, AtomicCpySize);
3040 const Function *Callee)
const override {
3041 return Impl.areInlineCompatible(Caller, Callee);
3043 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
3044 unsigned DefaultCallPenalty)
const override {
3045 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
3047 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
3048 const ArrayRef<Type *> &Types)
const override {
3049 return Impl.areTypesABICompatible(Caller, Callee, Types);
3052 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
3055 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
3057 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
3058 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3060 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
3061 return Impl.isLegalToVectorizeLoad(LI);
3063 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
3064 return Impl.isLegalToVectorizeStore(SI);
3066 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
3067 unsigned AddrSpace)
const override {
3068 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3071 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
3072 unsigned AddrSpace)
const override {
3073 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3076 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
3077 ElementCount VF)
const override {
3078 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3080 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
3081 return Impl.isElementTypeLegalForScalableVector(Ty);
3083 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
3084 unsigned ChainSizeInBytes,
3086 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3088 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
3089 unsigned ChainSizeInBytes,
3091 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3093 bool preferFixedOverScalableIfEqualCost()
const override {
3094 return Impl.preferFixedOverScalableIfEqualCost();
3096 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
3097 ReductionFlags Flags)
const override {
3098 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3100 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
3101 ReductionFlags Flags)
const override {
3102 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3104 bool preferEpilogueVectorization()
const override {
3105 return Impl.preferEpilogueVectorization();
3108 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
3109 return Impl.shouldExpandReduction(
II);
3113 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
3114 return Impl.getPreferredExpandedReductionShuffle(
II);
3117 unsigned getGISelRematGlobalCost()
const override {
3118 return Impl.getGISelRematGlobalCost();
3121 unsigned getMinTripCountTailFoldingThreshold()
const override {
3122 return Impl.getMinTripCountTailFoldingThreshold();
3125 bool supportsScalableVectors()
const override {
3126 return Impl.supportsScalableVectors();
3129 bool enableScalableVectorization()
const override {
3130 return Impl.enableScalableVectorization();
3133 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
3134 Align Alignment)
const override {
3135 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3138 bool isProfitableToSinkOperands(Instruction *
I,
3139 SmallVectorImpl<Use *> &Ops)
const override {
3140 return Impl.isProfitableToSinkOperands(
I, Ops);
3143 bool isVectorShiftByScalarCheap(Type *Ty)
const override {
3144 return Impl.isVectorShiftByScalarCheap(Ty);
3148 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
3149 return Impl.getVPLegalizationStrategy(PI);
3152 bool hasArmWideBranch(
bool Thumb)
const override {
3153 return Impl.hasArmWideBranch(Thumb);
3156 uint64_t getFeatureMask(
const Function &
F)
const override {
3157 return Impl.getFeatureMask(
F);
3160 bool isMultiversionedFunction(
const Function &
F)
const override {
3161 return Impl.isMultiversionedFunction(
F);
3164 unsigned getMaxNumArgs()
const override {
3165 return Impl.getMaxNumArgs();
3168 unsigned getNumBytesToPadGlobalArray(
unsigned Size,
3174template <
typename T>
3176 : TTIImpl(new Model<
T>(Impl)) {}
3207 : TTICallback(Arg.TTICallback) {}
3209 : TTICallback(
std::
move(Arg.TTICallback)) {}
3211 TTICallback =
RHS.TTICallback;
3215 TTICallback = std::move(
RHS.TTICallback);
3247 std::optional<TargetTransformInfo>
TTI;
3249 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL