summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@qt.io>2019-10-21 14:09:05 +0200
committerLaszlo Agocs <laszlo.agocs@qt.io>2019-10-23 10:29:49 +0000
commit53fc739e3d530a70e5371a08d001bacabc0233de (patch)
tree836910be948b6d7702b6944fcf0b7947f576d4b4
parent3ed14d7b0d539f97f2d68c83cc02d6509b24aea7 (diff)
Update SPIRV-Cross
Change-Id: I15dbf83057b5aa435b87b80e219b113987735cad Reviewed-by: Christian Strømme <christian.stromme@qt.io>
-rw-r--r--src/3rdparty/SPIRV-Cross/qt_attribution.json2
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv.h767
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv.hpp773
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cfg.cpp195
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cfg.hpp13
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_common.hpp430
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cpp.cpp2
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross.cpp1039
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross.hpp209
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp347
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross_c.h129
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp8
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp111
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp92
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_glsl.cpp1902
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_glsl.hpp79
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp303
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp9
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_msl.cpp5278
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_msl.hpp380
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_parser.cpp65
-rw-r--r--src/3rdparty/SPIRV-Cross/spirv_reflect.cpp39
22 files changed, 10145 insertions, 2027 deletions
diff --git a/src/3rdparty/SPIRV-Cross/qt_attribution.json b/src/3rdparty/SPIRV-Cross/qt_attribution.json
index 70df546..43847a0 100644
--- a/src/3rdparty/SPIRV-Cross/qt_attribution.json
+++ b/src/3rdparty/SPIRV-Cross/qt_attribution.json
@@ -7,7 +7,7 @@
"QtUsage": "Shader code generation.",
"Homepage": "https://github.com/KhronosGroup/SPIRV-Cross",
- "Version": "f647e655d489a7699305ada30cda808a7dac079f",
+ "Version": "ff1897ae0e1fc1e37c604933694477f335ca8e44",
"License": "Apache License 2.0",
"LicenseId": "Apache-2.0",
"LicenseFile": "LICENSE",
diff --git a/src/3rdparty/SPIRV-Cross/spirv.h b/src/3rdparty/SPIRV-Cross/spirv.h
index 8da27dd..1b67617 100644
--- a/src/3rdparty/SPIRV-Cross/spirv.h
+++ b/src/3rdparty/SPIRV-Cross/spirv.h
@@ -53,12 +53,12 @@
typedef unsigned int SpvId;
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10500
+#define SPV_REVISION 1
static const unsigned int SpvMagicNumber = 0x07230203;
-static const unsigned int SpvVersion = 0x00010300;
-static const unsigned int SpvRevision = 6;
+static const unsigned int SpvVersion = 0x00010400;
+static const unsigned int SpvRevision = 1;
static const unsigned int SpvOpCodeMask = 0xffff;
static const unsigned int SpvWordCountShift = 16;
@@ -95,6 +95,7 @@ typedef enum SpvAddressingModel_ {
SpvAddressingModelLogical = 0,
SpvAddressingModelPhysical32 = 1,
SpvAddressingModelPhysical64 = 2,
+ SpvAddressingModelPhysicalStorageBuffer64 = 5348,
SpvAddressingModelPhysicalStorageBuffer64EXT = 5348,
SpvAddressingModelMax = 0x7fffffff,
} SpvAddressingModel;
@@ -103,6 +104,7 @@ typedef enum SpvMemoryModel_ {
SpvMemoryModelSimple = 0,
SpvMemoryModelGLSL450 = 1,
SpvMemoryModelOpenCL = 2,
+ SpvMemoryModelVulkan = 3,
SpvMemoryModelVulkanKHR = 3,
SpvMemoryModelMax = 0x7fffffff,
} SpvMemoryModel;
@@ -158,6 +160,12 @@ typedef enum SpvExecutionMode_ {
SpvExecutionModeDerivativeGroupQuadsNV = 5289,
SpvExecutionModeDerivativeGroupLinearNV = 5290,
SpvExecutionModeOutputTrianglesNV = 5298,
+ SpvExecutionModePixelInterlockOrderedEXT = 5366,
+ SpvExecutionModePixelInterlockUnorderedEXT = 5367,
+ SpvExecutionModeSampleInterlockOrderedEXT = 5368,
+ SpvExecutionModeSampleInterlockUnorderedEXT = 5369,
+ SpvExecutionModeShadingRateInterlockOrderedEXT = 5370,
+ SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371,
SpvExecutionModeMax = 0x7fffffff,
} SpvExecutionMode;
@@ -181,6 +189,7 @@ typedef enum SpvStorageClass_ {
SpvStorageClassHitAttributeNV = 5339,
SpvStorageClassIncomingRayPayloadNV = 5342,
SpvStorageClassShaderRecordBufferNV = 5343,
+ SpvStorageClassPhysicalStorageBuffer = 5349,
SpvStorageClassPhysicalStorageBufferEXT = 5349,
SpvStorageClassMax = 0x7fffffff,
} SpvStorageClass;
@@ -309,10 +318,16 @@ typedef enum SpvImageOperandsShift_ {
SpvImageOperandsConstOffsetsShift = 5,
SpvImageOperandsSampleShift = 6,
SpvImageOperandsMinLodShift = 7,
+ SpvImageOperandsMakeTexelAvailableShift = 8,
SpvImageOperandsMakeTexelAvailableKHRShift = 8,
+ SpvImageOperandsMakeTexelVisibleShift = 9,
SpvImageOperandsMakeTexelVisibleKHRShift = 9,
+ SpvImageOperandsNonPrivateTexelShift = 10,
SpvImageOperandsNonPrivateTexelKHRShift = 10,
+ SpvImageOperandsVolatileTexelShift = 11,
SpvImageOperandsVolatileTexelKHRShift = 11,
+ SpvImageOperandsSignExtendShift = 12,
+ SpvImageOperandsZeroExtendShift = 13,
SpvImageOperandsMax = 0x7fffffff,
} SpvImageOperandsShift;
@@ -326,10 +341,16 @@ typedef enum SpvImageOperandsMask_ {
SpvImageOperandsConstOffsetsMask = 0x00000020,
SpvImageOperandsSampleMask = 0x00000040,
SpvImageOperandsMinLodMask = 0x00000080,
+ SpvImageOperandsMakeTexelAvailableMask = 0x00000100,
SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+ SpvImageOperandsMakeTexelVisibleMask = 0x00000200,
SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+ SpvImageOperandsNonPrivateTexelMask = 0x00000400,
SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+ SpvImageOperandsVolatileTexelMask = 0x00000800,
SpvImageOperandsVolatileTexelKHRMask = 0x00000800,
+ SpvImageOperandsSignExtendMask = 0x00001000,
+ SpvImageOperandsZeroExtendMask = 0x00002000,
} SpvImageOperandsMask;
typedef enum SpvFPFastMathModeShift_ {
@@ -410,6 +431,7 @@ typedef enum SpvDecoration_ {
SpvDecorationNonWritable = 24,
SpvDecorationNonReadable = 25,
SpvDecorationUniform = 26,
+ SpvDecorationUniformId = 27,
SpvDecorationSaturatedConversion = 28,
SpvDecorationStream = 29,
SpvDecorationLocation = 30,
@@ -441,11 +463,17 @@ typedef enum SpvDecoration_ {
SpvDecorationPerViewNV = 5272,
SpvDecorationPerTaskNV = 5273,
SpvDecorationPerVertexNV = 5285,
+ SpvDecorationNonUniform = 5300,
SpvDecorationNonUniformEXT = 5300,
+ SpvDecorationRestrictPointer = 5355,
SpvDecorationRestrictPointerEXT = 5355,
+ SpvDecorationAliasedPointer = 5356,
SpvDecorationAliasedPointerEXT = 5356,
+ SpvDecorationCounterBuffer = 5634,
SpvDecorationHlslCounterBufferGOOGLE = 5634,
SpvDecorationHlslSemanticGOOGLE = 5635,
+ SpvDecorationUserSemantic = 5635,
+ SpvDecorationUserTypeGOOGLE = 5636,
SpvDecorationMax = 0x7fffffff,
} SpvDecoration;
@@ -548,6 +576,10 @@ typedef enum SpvBuiltIn_ {
SpvBuiltInHitTNV = 5332,
SpvBuiltInHitKindNV = 5333,
SpvBuiltInIncomingRayFlagsNV = 5351,
+ SpvBuiltInWarpsPerSMNV = 5374,
+ SpvBuiltInSMCountNV = 5375,
+ SpvBuiltInWarpIDNV = 5376,
+ SpvBuiltInSMIDNV = 5377,
SpvBuiltInMax = 0x7fffffff,
} SpvBuiltIn;
@@ -568,6 +600,11 @@ typedef enum SpvLoopControlShift_ {
SpvLoopControlDontUnrollShift = 1,
SpvLoopControlDependencyInfiniteShift = 2,
SpvLoopControlDependencyLengthShift = 3,
+ SpvLoopControlMinIterationsShift = 4,
+ SpvLoopControlMaxIterationsShift = 5,
+ SpvLoopControlIterationMultipleShift = 6,
+ SpvLoopControlPeelCountShift = 7,
+ SpvLoopControlPartialCountShift = 8,
SpvLoopControlMax = 0x7fffffff,
} SpvLoopControlShift;
@@ -577,6 +614,11 @@ typedef enum SpvLoopControlMask_ {
SpvLoopControlDontUnrollMask = 0x00000002,
SpvLoopControlDependencyInfiniteMask = 0x00000004,
SpvLoopControlDependencyLengthMask = 0x00000008,
+ SpvLoopControlMinIterationsMask = 0x00000010,
+ SpvLoopControlMaxIterationsMask = 0x00000020,
+ SpvLoopControlIterationMultipleMask = 0x00000040,
+ SpvLoopControlPeelCountMask = 0x00000080,
+ SpvLoopControlPartialCountMask = 0x00000100,
} SpvLoopControlMask;
typedef enum SpvFunctionControlShift_ {
@@ -606,9 +648,13 @@ typedef enum SpvMemorySemanticsShift_ {
SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
SpvMemorySemanticsAtomicCounterMemoryShift = 10,
SpvMemorySemanticsImageMemoryShift = 11,
+ SpvMemorySemanticsOutputMemoryShift = 12,
SpvMemorySemanticsOutputMemoryKHRShift = 12,
+ SpvMemorySemanticsMakeAvailableShift = 13,
SpvMemorySemanticsMakeAvailableKHRShift = 13,
+ SpvMemorySemanticsMakeVisibleShift = 14,
SpvMemorySemanticsMakeVisibleKHRShift = 14,
+ SpvMemorySemanticsVolatileShift = 15,
SpvMemorySemanticsMax = 0x7fffffff,
} SpvMemorySemanticsShift;
@@ -624,17 +670,24 @@ typedef enum SpvMemorySemanticsMask_ {
SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
SpvMemorySemanticsImageMemoryMask = 0x00000800,
+ SpvMemorySemanticsOutputMemoryMask = 0x00001000,
SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000,
+ SpvMemorySemanticsMakeAvailableMask = 0x00002000,
SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000,
+ SpvMemorySemanticsMakeVisibleMask = 0x00004000,
SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000,
+ SpvMemorySemanticsVolatileMask = 0x00008000,
} SpvMemorySemanticsMask;
typedef enum SpvMemoryAccessShift_ {
SpvMemoryAccessVolatileShift = 0,
SpvMemoryAccessAlignedShift = 1,
SpvMemoryAccessNontemporalShift = 2,
+ SpvMemoryAccessMakePointerAvailableShift = 3,
SpvMemoryAccessMakePointerAvailableKHRShift = 3,
+ SpvMemoryAccessMakePointerVisibleShift = 4,
SpvMemoryAccessMakePointerVisibleKHRShift = 4,
+ SpvMemoryAccessNonPrivatePointerShift = 5,
SpvMemoryAccessNonPrivatePointerKHRShift = 5,
SpvMemoryAccessMax = 0x7fffffff,
} SpvMemoryAccessShift;
@@ -644,8 +697,11 @@ typedef enum SpvMemoryAccessMask_ {
SpvMemoryAccessVolatileMask = 0x00000001,
SpvMemoryAccessAlignedMask = 0x00000002,
SpvMemoryAccessNontemporalMask = 0x00000004,
+ SpvMemoryAccessMakePointerAvailableMask = 0x00000008,
SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+ SpvMemoryAccessMakePointerVisibleMask = 0x00000010,
SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+ SpvMemoryAccessNonPrivatePointerMask = 0x00000020,
SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020,
} SpvMemoryAccessMask;
@@ -655,6 +711,7 @@ typedef enum SpvScope_ {
SpvScopeWorkgroup = 2,
SpvScopeSubgroup = 3,
SpvScopeInvocation = 4,
+ SpvScopeQueueFamily = 5,
SpvScopeQueueFamilyKHR = 5,
SpvScopeMax = 0x7fffffff,
} SpvScope;
@@ -755,6 +812,8 @@ typedef enum SpvCapability_ {
SpvCapabilityGroupNonUniformShuffleRelative = 66,
SpvCapabilityGroupNonUniformClustered = 67,
SpvCapabilityGroupNonUniformQuad = 68,
+ SpvCapabilityShaderLayer = 69,
+ SpvCapabilityShaderViewportIndex = 70,
SpvCapabilitySubgroupBallotKHR = 4423,
SpvCapabilityDrawParameters = 4427,
SpvCapabilitySubgroupVoteKHR = 4431,
@@ -783,6 +842,7 @@ typedef enum SpvCapability_ {
SpvCapabilityFragmentMaskAMD = 5010,
SpvCapabilityStencilExportEXT = 5013,
SpvCapabilityImageReadWriteLodAMD = 5015,
+ SpvCapabilityShaderClockKHR = 5055,
SpvCapabilitySampleMaskOverrideCoverageNV = 5249,
SpvCapabilityGeometryShaderPassthroughNV = 5251,
SpvCapabilityShaderViewportIndexLayerEXT = 5254,
@@ -798,28 +858,52 @@ typedef enum SpvCapability_ {
SpvCapabilityFragmentDensityEXT = 5291,
SpvCapabilityShadingRateNV = 5291,
SpvCapabilityGroupNonUniformPartitionedNV = 5297,
+ SpvCapabilityShaderNonUniform = 5301,
SpvCapabilityShaderNonUniformEXT = 5301,
+ SpvCapabilityRuntimeDescriptorArray = 5302,
SpvCapabilityRuntimeDescriptorArrayEXT = 5302,
+ SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303,
SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+ SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+ SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+ SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306,
SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+ SpvCapabilitySampledImageArrayNonUniformIndexing = 5307,
SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+ SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308,
SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+ SpvCapabilityStorageImageArrayNonUniformIndexing = 5309,
SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+ SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310,
SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+ SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+ SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
SpvCapabilityRayTracingNV = 5340,
+ SpvCapabilityVulkanMemoryModel = 5345,
SpvCapabilityVulkanMemoryModelKHR = 5345,
+ SpvCapabilityVulkanMemoryModelDeviceScope = 5346,
SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+ SpvCapabilityPhysicalStorageBufferAddresses = 5347,
SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347,
SpvCapabilityComputeDerivativeGroupLinearNV = 5350,
SpvCapabilityCooperativeMatrixNV = 5357,
+ SpvCapabilityFragmentShaderSampleInterlockEXT = 5363,
+ SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+ SpvCapabilityShaderSMBuiltinsNV = 5373,
+ SpvCapabilityFragmentShaderPixelInterlockEXT = 5378,
+ SpvCapabilityDemoteToHelperInvocationEXT = 5379,
SpvCapabilitySubgroupShuffleINTEL = 5568,
SpvCapabilitySubgroupBufferBlockIOINTEL = 5569,
SpvCapabilitySubgroupImageBlockIOINTEL = 5570,
SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+ SpvCapabilityIntegerFunctions2INTEL = 5584,
+ SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+ SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+ SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
SpvCapabilityMax = 0x7fffffff,
} SpvCapability;
@@ -1164,6 +1248,10 @@ typedef enum SpvOp_ {
SpvOpGroupNonUniformLogicalXor = 364,
SpvOpGroupNonUniformQuadBroadcast = 365,
SpvOpGroupNonUniformQuadSwap = 366,
+ SpvOpCopyLogical = 400,
+ SpvOpPtrEqual = 401,
+ SpvOpPtrNotEqual = 402,
+ SpvOpPtrDiff = 403,
SpvOpSubgroupBallotKHR = 4421,
SpvOpSubgroupFirstInvocationKHR = 4422,
SpvOpSubgroupAllKHR = 4428,
@@ -1180,6 +1268,7 @@ typedef enum SpvOp_ {
SpvOpGroupSMaxNonUniformAMD = 5007,
SpvOpFragmentMaskFetchAMD = 5011,
SpvOpFragmentFetchAMD = 5012,
+ SpvOpReadClockKHR = 5056,
SpvOpImageSampleFootprintNV = 5283,
SpvOpGroupNonUniformPartitionNV = 5296,
SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
@@ -1194,6 +1283,10 @@ typedef enum SpvOp_ {
SpvOpCooperativeMatrixStoreNV = 5360,
SpvOpCooperativeMatrixMulAddNV = 5361,
SpvOpCooperativeMatrixLengthNV = 5362,
+ SpvOpBeginInvocationInterlockEXT = 5364,
+ SpvOpEndInvocationInterlockEXT = 5365,
+ SpvOpDemoteToHelperInvocationEXT = 5380,
+ SpvOpIsHelperInvocationEXT = 5381,
SpvOpSubgroupShuffleINTEL = 5571,
SpvOpSubgroupShuffleDownINTEL = 5572,
SpvOpSubgroupShuffleUpINTEL = 5573,
@@ -1204,10 +1297,676 @@ typedef enum SpvOp_ {
SpvOpSubgroupImageBlockWriteINTEL = 5578,
SpvOpSubgroupImageMediaBlockReadINTEL = 5580,
SpvOpSubgroupImageMediaBlockWriteINTEL = 5581,
+ SpvOpUCountLeadingZerosINTEL = 5585,
+ SpvOpUCountTrailingZerosINTEL = 5586,
+ SpvOpAbsISubINTEL = 5587,
+ SpvOpAbsUSubINTEL = 5588,
+ SpvOpIAddSatINTEL = 5589,
+ SpvOpUAddSatINTEL = 5590,
+ SpvOpIAverageINTEL = 5591,
+ SpvOpUAverageINTEL = 5592,
+ SpvOpIAverageRoundedINTEL = 5593,
+ SpvOpUAverageRoundedINTEL = 5594,
+ SpvOpISubSatINTEL = 5595,
+ SpvOpUSubSatINTEL = 5596,
+ SpvOpIMul32x16INTEL = 5597,
+ SpvOpUMul32x16INTEL = 5598,
+ SpvOpDecorateString = 5632,
SpvOpDecorateStringGOOGLE = 5632,
+ SpvOpMemberDecorateString = 5633,
SpvOpMemberDecorateStringGOOGLE = 5633,
+ SpvOpVmeImageINTEL = 5699,
+ SpvOpTypeVmeImageINTEL = 5700,
+ SpvOpTypeAvcImePayloadINTEL = 5701,
+ SpvOpTypeAvcRefPayloadINTEL = 5702,
+ SpvOpTypeAvcSicPayloadINTEL = 5703,
+ SpvOpTypeAvcMcePayloadINTEL = 5704,
+ SpvOpTypeAvcMceResultINTEL = 5705,
+ SpvOpTypeAvcImeResultINTEL = 5706,
+ SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+ SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+ SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+ SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+ SpvOpTypeAvcRefResultINTEL = 5711,
+ SpvOpTypeAvcSicResultINTEL = 5712,
+ SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+ SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+ SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+ SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+ SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+ SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+ SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+ SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+ SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+ SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+ SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+ SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+ SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+ SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+ SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+ SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+ SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+ SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+ SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+ SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+ SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+ SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+ SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+ SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+ SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+ SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+ SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+ SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+ SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+ SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+ SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+ SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+ SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+ SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+ SpvOpSubgroupAvcImeInitializeINTEL = 5747,
+ SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+ SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+ SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+ SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+ SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+ SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+ SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+ SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+ SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+ SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+ SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+ SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+ SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+ SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+ SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+ SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+ SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+ SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+ SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+ SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+ SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+ SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+ SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+ SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+ SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+ SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+ SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+ SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+ SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+ SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+ SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+ SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+ SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+ SpvOpSubgroupAvcFmeInitializeINTEL = 5781,
+ SpvOpSubgroupAvcBmeInitializeINTEL = 5782,
+ SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+ SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+ SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+ SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+ SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+ SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+ SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+ SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+ SpvOpSubgroupAvcSicInitializeINTEL = 5791,
+ SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792,
+ SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+ SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+ SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+ SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+ SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+ SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+ SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+ SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+ SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+ SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+ SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+ SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+ SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+ SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+ SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+ SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+ SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+ SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+ SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+ SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+ SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+ SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+ SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+ SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
SpvOpMax = 0x7fffffff,
} SpvOp;
+#ifdef SPV_ENABLE_UTILITY_CODE
+inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) {
+ *hasResult = *hasResultType = false;
+ switch (opcode) {
+ default: /* unknown opcode */ break;
+ case SpvOpNop: *hasResult = false; *hasResultType = false; break;
+ case SpvOpUndef: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSource: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break;
+ case SpvOpName: *hasResult = false; *hasResultType = false; break;
+ case SpvOpMemberName: *hasResult = false; *hasResultType = false; break;
+ case SpvOpString: *hasResult = true; *hasResultType = false; break;
+ case SpvOpLine: *hasResult = false; *hasResultType = false; break;
+ case SpvOpExtension: *hasResult = false; *hasResultType = false; break;
+ case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break;
+ case SpvOpExtInst: *hasResult = true; *hasResultType = true; break;
+ case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break;
+ case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCapability: *hasResult = false; *hasResultType = false; break;
+ case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+ case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConstant: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFunction: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+ case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break;
+ case SpvOpVariable: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLoad: *hasResult = true; *hasResultType = true; break;
+ case SpvOpStore: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+ case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break;
+ case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+ case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+ case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDecorate: *hasResult = false; *hasResultType = false; break;
+ case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+ case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+ case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+ case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+ case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+ case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break;
+ case SpvOpTranspose: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageGather: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageRead: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break;
+ case SpvOpImage: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUConvert: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSConvert: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFConvert: *hasResult = true; *hasResultType = true; break;
+ case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+ case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitcast: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSNegate: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFNegate: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpISub: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFSub: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIMul: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFMul: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUDiv: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSDiv: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFDiv: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUMod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSRem: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSMod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFRem: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFMod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+ case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+ case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+ case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+ case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+ case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDot: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break;
+ case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAny: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAll: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIsNan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIsInf: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+ case SpvOpOrdered: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUnordered: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSelect: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpULessThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+ case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+ case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpNot: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBitCount: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdx: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdy: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFwidth: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+ case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+ case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break;
+ case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+ case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break;
+ case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPhi: *hasResult = true; *hasResultType = true; break;
+ case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+ case SpvOpLabel: *hasResult = true; *hasResultType = false; break;
+ case SpvOpBranch: *hasResult = false; *hasResultType = false; break;
+ case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSwitch: *hasResult = false; *hasResultType = false; break;
+ case SpvOpKill: *hasResult = false; *hasResultType = false; break;
+ case SpvOpReturn: *hasResult = false; *hasResultType = false; break;
+ case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break;
+ case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break;
+ case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+ case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break;
+ case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+ case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+ case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+ case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break;
+ case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+ case SpvOpNoLine: *hasResult = false; *hasResultType = false; break;
+ case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+ case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break;
+ case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+ case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+ case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+ case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+ case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+ case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+ case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+ case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break;
+ case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+ case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+ case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+ case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+ case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+ case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+ case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+ case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+ case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+ case SpvOpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break;
+ case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break;
+ case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+ case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+ }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
#endif
diff --git a/src/3rdparty/SPIRV-Cross/spirv.hpp b/src/3rdparty/SPIRV-Cross/spirv.hpp
index adc13de..f5cbda1 100644
--- a/src/3rdparty/SPIRV-Cross/spirv.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv.hpp
@@ -49,12 +49,12 @@ namespace spv {
typedef unsigned int Id;
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10500
+#define SPV_REVISION 1
static const unsigned int MagicNumber = 0x07230203;
-static const unsigned int Version = 0x00010300;
-static const unsigned int Revision = 6;
+static const unsigned int Version = 0x00010400;
+static const unsigned int Revision = 1;
static const unsigned int OpCodeMask = 0xffff;
static const unsigned int WordCountShift = 16;
@@ -91,6 +91,7 @@ enum AddressingModel {
AddressingModelLogical = 0,
AddressingModelPhysical32 = 1,
AddressingModelPhysical64 = 2,
+ AddressingModelPhysicalStorageBuffer64 = 5348,
AddressingModelPhysicalStorageBuffer64EXT = 5348,
AddressingModelMax = 0x7fffffff,
};
@@ -99,6 +100,7 @@ enum MemoryModel {
MemoryModelSimple = 0,
MemoryModelGLSL450 = 1,
MemoryModelOpenCL = 2,
+ MemoryModelVulkan = 3,
MemoryModelVulkanKHR = 3,
MemoryModelMax = 0x7fffffff,
};
@@ -154,6 +156,12 @@ enum ExecutionMode {
ExecutionModeDerivativeGroupQuadsNV = 5289,
ExecutionModeDerivativeGroupLinearNV = 5290,
ExecutionModeOutputTrianglesNV = 5298,
+ ExecutionModePixelInterlockOrderedEXT = 5366,
+ ExecutionModePixelInterlockUnorderedEXT = 5367,
+ ExecutionModeSampleInterlockOrderedEXT = 5368,
+ ExecutionModeSampleInterlockUnorderedEXT = 5369,
+ ExecutionModeShadingRateInterlockOrderedEXT = 5370,
+ ExecutionModeShadingRateInterlockUnorderedEXT = 5371,
ExecutionModeMax = 0x7fffffff,
};
@@ -177,6 +185,7 @@ enum StorageClass {
StorageClassHitAttributeNV = 5339,
StorageClassIncomingRayPayloadNV = 5342,
StorageClassShaderRecordBufferNV = 5343,
+ StorageClassPhysicalStorageBuffer = 5349,
StorageClassPhysicalStorageBufferEXT = 5349,
StorageClassMax = 0x7fffffff,
};
@@ -305,10 +314,16 @@ enum ImageOperandsShift {
ImageOperandsConstOffsetsShift = 5,
ImageOperandsSampleShift = 6,
ImageOperandsMinLodShift = 7,
+ ImageOperandsMakeTexelAvailableShift = 8,
ImageOperandsMakeTexelAvailableKHRShift = 8,
+ ImageOperandsMakeTexelVisibleShift = 9,
ImageOperandsMakeTexelVisibleKHRShift = 9,
+ ImageOperandsNonPrivateTexelShift = 10,
ImageOperandsNonPrivateTexelKHRShift = 10,
+ ImageOperandsVolatileTexelShift = 11,
ImageOperandsVolatileTexelKHRShift = 11,
+ ImageOperandsSignExtendShift = 12,
+ ImageOperandsZeroExtendShift = 13,
ImageOperandsMax = 0x7fffffff,
};
@@ -322,10 +337,16 @@ enum ImageOperandsMask {
ImageOperandsConstOffsetsMask = 0x00000020,
ImageOperandsSampleMask = 0x00000040,
ImageOperandsMinLodMask = 0x00000080,
+ ImageOperandsMakeTexelAvailableMask = 0x00000100,
ImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+ ImageOperandsMakeTexelVisibleMask = 0x00000200,
ImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+ ImageOperandsNonPrivateTexelMask = 0x00000400,
ImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+ ImageOperandsVolatileTexelMask = 0x00000800,
ImageOperandsVolatileTexelKHRMask = 0x00000800,
+ ImageOperandsSignExtendMask = 0x00001000,
+ ImageOperandsZeroExtendMask = 0x00002000,
};
enum FPFastMathModeShift {
@@ -406,6 +427,7 @@ enum Decoration {
DecorationNonWritable = 24,
DecorationNonReadable = 25,
DecorationUniform = 26,
+ DecorationUniformId = 27,
DecorationSaturatedConversion = 28,
DecorationStream = 29,
DecorationLocation = 30,
@@ -437,11 +459,17 @@ enum Decoration {
DecorationPerViewNV = 5272,
DecorationPerTaskNV = 5273,
DecorationPerVertexNV = 5285,
+ DecorationNonUniform = 5300,
DecorationNonUniformEXT = 5300,
+ DecorationRestrictPointer = 5355,
DecorationRestrictPointerEXT = 5355,
+ DecorationAliasedPointer = 5356,
DecorationAliasedPointerEXT = 5356,
+ DecorationCounterBuffer = 5634,
DecorationHlslCounterBufferGOOGLE = 5634,
DecorationHlslSemanticGOOGLE = 5635,
+ DecorationUserSemantic = 5635,
+ DecorationUserTypeGOOGLE = 5636,
DecorationMax = 0x7fffffff,
};
@@ -544,6 +572,10 @@ enum BuiltIn {
BuiltInHitTNV = 5332,
BuiltInHitKindNV = 5333,
BuiltInIncomingRayFlagsNV = 5351,
+ BuiltInWarpsPerSMNV = 5374,
+ BuiltInSMCountNV = 5375,
+ BuiltInWarpIDNV = 5376,
+ BuiltInSMIDNV = 5377,
BuiltInMax = 0x7fffffff,
};
@@ -564,6 +596,11 @@ enum LoopControlShift {
LoopControlDontUnrollShift = 1,
LoopControlDependencyInfiniteShift = 2,
LoopControlDependencyLengthShift = 3,
+ LoopControlMinIterationsShift = 4,
+ LoopControlMaxIterationsShift = 5,
+ LoopControlIterationMultipleShift = 6,
+ LoopControlPeelCountShift = 7,
+ LoopControlPartialCountShift = 8,
LoopControlMax = 0x7fffffff,
};
@@ -573,6 +610,11 @@ enum LoopControlMask {
LoopControlDontUnrollMask = 0x00000002,
LoopControlDependencyInfiniteMask = 0x00000004,
LoopControlDependencyLengthMask = 0x00000008,
+ LoopControlMinIterationsMask = 0x00000010,
+ LoopControlMaxIterationsMask = 0x00000020,
+ LoopControlIterationMultipleMask = 0x00000040,
+ LoopControlPeelCountMask = 0x00000080,
+ LoopControlPartialCountMask = 0x00000100,
};
enum FunctionControlShift {
@@ -602,9 +644,13 @@ enum MemorySemanticsShift {
MemorySemanticsCrossWorkgroupMemoryShift = 9,
MemorySemanticsAtomicCounterMemoryShift = 10,
MemorySemanticsImageMemoryShift = 11,
+ MemorySemanticsOutputMemoryShift = 12,
MemorySemanticsOutputMemoryKHRShift = 12,
+ MemorySemanticsMakeAvailableShift = 13,
MemorySemanticsMakeAvailableKHRShift = 13,
+ MemorySemanticsMakeVisibleShift = 14,
MemorySemanticsMakeVisibleKHRShift = 14,
+ MemorySemanticsVolatileShift = 15,
MemorySemanticsMax = 0x7fffffff,
};
@@ -620,17 +666,24 @@ enum MemorySemanticsMask {
MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
MemorySemanticsAtomicCounterMemoryMask = 0x00000400,
MemorySemanticsImageMemoryMask = 0x00000800,
+ MemorySemanticsOutputMemoryMask = 0x00001000,
MemorySemanticsOutputMemoryKHRMask = 0x00001000,
+ MemorySemanticsMakeAvailableMask = 0x00002000,
MemorySemanticsMakeAvailableKHRMask = 0x00002000,
+ MemorySemanticsMakeVisibleMask = 0x00004000,
MemorySemanticsMakeVisibleKHRMask = 0x00004000,
+ MemorySemanticsVolatileMask = 0x00008000,
};
enum MemoryAccessShift {
MemoryAccessVolatileShift = 0,
MemoryAccessAlignedShift = 1,
MemoryAccessNontemporalShift = 2,
+ MemoryAccessMakePointerAvailableShift = 3,
MemoryAccessMakePointerAvailableKHRShift = 3,
+ MemoryAccessMakePointerVisibleShift = 4,
MemoryAccessMakePointerVisibleKHRShift = 4,
+ MemoryAccessNonPrivatePointerShift = 5,
MemoryAccessNonPrivatePointerKHRShift = 5,
MemoryAccessMax = 0x7fffffff,
};
@@ -640,8 +693,11 @@ enum MemoryAccessMask {
MemoryAccessVolatileMask = 0x00000001,
MemoryAccessAlignedMask = 0x00000002,
MemoryAccessNontemporalMask = 0x00000004,
+ MemoryAccessMakePointerAvailableMask = 0x00000008,
MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+ MemoryAccessMakePointerVisibleMask = 0x00000010,
MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+ MemoryAccessNonPrivatePointerMask = 0x00000020,
MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
};
@@ -651,6 +707,7 @@ enum Scope {
ScopeWorkgroup = 2,
ScopeSubgroup = 3,
ScopeInvocation = 4,
+ ScopeQueueFamily = 5,
ScopeQueueFamilyKHR = 5,
ScopeMax = 0x7fffffff,
};
@@ -751,6 +808,8 @@ enum Capability {
CapabilityGroupNonUniformShuffleRelative = 66,
CapabilityGroupNonUniformClustered = 67,
CapabilityGroupNonUniformQuad = 68,
+ CapabilityShaderLayer = 69,
+ CapabilityShaderViewportIndex = 70,
CapabilitySubgroupBallotKHR = 4423,
CapabilityDrawParameters = 4427,
CapabilitySubgroupVoteKHR = 4431,
@@ -779,6 +838,7 @@ enum Capability {
CapabilityFragmentMaskAMD = 5010,
CapabilityStencilExportEXT = 5013,
CapabilityImageReadWriteLodAMD = 5015,
+ CapabilityShaderClockKHR = 5055,
CapabilitySampleMaskOverrideCoverageNV = 5249,
CapabilityGeometryShaderPassthroughNV = 5251,
CapabilityShaderViewportIndexLayerEXT = 5254,
@@ -794,27 +854,52 @@ enum Capability {
CapabilityFragmentDensityEXT = 5291,
CapabilityShadingRateNV = 5291,
CapabilityGroupNonUniformPartitionedNV = 5297,
+ CapabilityShaderNonUniform = 5301,
CapabilityShaderNonUniformEXT = 5301,
+ CapabilityRuntimeDescriptorArray = 5302,
CapabilityRuntimeDescriptorArrayEXT = 5302,
+ CapabilityInputAttachmentArrayDynamicIndexing = 5303,
CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+ CapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+ CapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+ CapabilityUniformBufferArrayNonUniformIndexing = 5306,
CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+ CapabilitySampledImageArrayNonUniformIndexing = 5307,
CapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+ CapabilityStorageBufferArrayNonUniformIndexing = 5308,
CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+ CapabilityStorageImageArrayNonUniformIndexing = 5309,
CapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+ CapabilityInputAttachmentArrayNonUniformIndexing = 5310,
CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+ CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+ CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
CapabilityRayTracingNV = 5340,
+ CapabilityVulkanMemoryModel = 5345,
CapabilityVulkanMemoryModelKHR = 5345,
+ CapabilityVulkanMemoryModelDeviceScope = 5346,
CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+ CapabilityPhysicalStorageBufferAddresses = 5347,
CapabilityPhysicalStorageBufferAddressesEXT = 5347,
CapabilityComputeDerivativeGroupLinearNV = 5350,
+ CapabilityCooperativeMatrixNV = 5357,
+ CapabilityFragmentShaderSampleInterlockEXT = 5363,
+ CapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+ CapabilityShaderSMBuiltinsNV = 5373,
+ CapabilityFragmentShaderPixelInterlockEXT = 5378,
+ CapabilityDemoteToHelperInvocationEXT = 5379,
CapabilitySubgroupShuffleINTEL = 5568,
CapabilitySubgroupBufferBlockIOINTEL = 5569,
CapabilitySubgroupImageBlockIOINTEL = 5570,
CapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+ CapabilityIntegerFunctions2INTEL = 5584,
+ CapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+ CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+ CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
CapabilityMax = 0x7fffffff,
};
@@ -1159,6 +1244,10 @@ enum Op {
OpGroupNonUniformLogicalXor = 364,
OpGroupNonUniformQuadBroadcast = 365,
OpGroupNonUniformQuadSwap = 366,
+ OpCopyLogical = 400,
+ OpPtrEqual = 401,
+ OpPtrNotEqual = 402,
+ OpPtrDiff = 403,
OpSubgroupBallotKHR = 4421,
OpSubgroupFirstInvocationKHR = 4422,
OpSubgroupAllKHR = 4428,
@@ -1175,6 +1264,7 @@ enum Op {
OpGroupSMaxNonUniformAMD = 5007,
OpFragmentMaskFetchAMD = 5011,
OpFragmentFetchAMD = 5012,
+ OpReadClockKHR = 5056,
OpImageSampleFootprintNV = 5283,
OpGroupNonUniformPartitionNV = 5296,
OpWritePackedPrimitiveIndices4x8NV = 5299,
@@ -1184,6 +1274,15 @@ enum Op {
OpTraceNV = 5337,
OpTypeAccelerationStructureNV = 5341,
OpExecuteCallableNV = 5344,
+ OpTypeCooperativeMatrixNV = 5358,
+ OpCooperativeMatrixLoadNV = 5359,
+ OpCooperativeMatrixStoreNV = 5360,
+ OpCooperativeMatrixMulAddNV = 5361,
+ OpCooperativeMatrixLengthNV = 5362,
+ OpBeginInvocationInterlockEXT = 5364,
+ OpEndInvocationInterlockEXT = 5365,
+ OpDemoteToHelperInvocationEXT = 5380,
+ OpIsHelperInvocationEXT = 5381,
OpSubgroupShuffleINTEL = 5571,
OpSubgroupShuffleDownINTEL = 5572,
OpSubgroupShuffleUpINTEL = 5573,
@@ -1194,11 +1293,677 @@ enum Op {
OpSubgroupImageBlockWriteINTEL = 5578,
OpSubgroupImageMediaBlockReadINTEL = 5580,
OpSubgroupImageMediaBlockWriteINTEL = 5581,
+ OpUCountLeadingZerosINTEL = 5585,
+ OpUCountTrailingZerosINTEL = 5586,
+ OpAbsISubINTEL = 5587,
+ OpAbsUSubINTEL = 5588,
+ OpIAddSatINTEL = 5589,
+ OpUAddSatINTEL = 5590,
+ OpIAverageINTEL = 5591,
+ OpUAverageINTEL = 5592,
+ OpIAverageRoundedINTEL = 5593,
+ OpUAverageRoundedINTEL = 5594,
+ OpISubSatINTEL = 5595,
+ OpUSubSatINTEL = 5596,
+ OpIMul32x16INTEL = 5597,
+ OpUMul32x16INTEL = 5598,
+ OpDecorateString = 5632,
OpDecorateStringGOOGLE = 5632,
+ OpMemberDecorateString = 5633,
OpMemberDecorateStringGOOGLE = 5633,
+ OpVmeImageINTEL = 5699,
+ OpTypeVmeImageINTEL = 5700,
+ OpTypeAvcImePayloadINTEL = 5701,
+ OpTypeAvcRefPayloadINTEL = 5702,
+ OpTypeAvcSicPayloadINTEL = 5703,
+ OpTypeAvcMcePayloadINTEL = 5704,
+ OpTypeAvcMceResultINTEL = 5705,
+ OpTypeAvcImeResultINTEL = 5706,
+ OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+ OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+ OpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+ OpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+ OpTypeAvcRefResultINTEL = 5711,
+ OpTypeAvcSicResultINTEL = 5712,
+ OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+ OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+ OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+ OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+ OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+ OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+ OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+ OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+ OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+ OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+ OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+ OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+ OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+ OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+ OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+ OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+ OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+ OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+ OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+ OpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+ OpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+ OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+ OpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+ OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+ OpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+ OpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+ OpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+ OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+ OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+ OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+ OpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+ OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+ OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+ OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+ OpSubgroupAvcImeInitializeINTEL = 5747,
+ OpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+ OpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+ OpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+ OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+ OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+ OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+ OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+ OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+ OpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+ OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+ OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+ OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+ OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+ OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+ OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+ OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+ OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+ OpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+ OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+ OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+ OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+ OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+ OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+ OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+ OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+ OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+ OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+ OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+ OpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+ OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+ OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+ OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+ OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+ OpSubgroupAvcFmeInitializeINTEL = 5781,
+ OpSubgroupAvcBmeInitializeINTEL = 5782,
+ OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+ OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+ OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+ OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+ OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+ OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+ OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+ OpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+ OpSubgroupAvcSicInitializeINTEL = 5791,
+ OpSubgroupAvcSicConfigureSkcINTEL = 5792,
+ OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+ OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+ OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+ OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+ OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+ OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+ OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+ OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+ OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+ OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+ OpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+ OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+ OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+ OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+ OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+ OpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+ OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+ OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+ OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+ OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+ OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+ OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+ OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+ OpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
OpMax = 0x7fffffff,
};
+#ifdef SPV_ENABLE_UTILITY_CODE
+inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
+ *hasResult = *hasResultType = false;
+ switch (opcode) {
+ default: /* unknown opcode */ break;
+ case OpNop: *hasResult = false; *hasResultType = false; break;
+ case OpUndef: *hasResult = true; *hasResultType = true; break;
+ case OpSourceContinued: *hasResult = false; *hasResultType = false; break;
+ case OpSource: *hasResult = false; *hasResultType = false; break;
+ case OpSourceExtension: *hasResult = false; *hasResultType = false; break;
+ case OpName: *hasResult = false; *hasResultType = false; break;
+ case OpMemberName: *hasResult = false; *hasResultType = false; break;
+ case OpString: *hasResult = true; *hasResultType = false; break;
+ case OpLine: *hasResult = false; *hasResultType = false; break;
+ case OpExtension: *hasResult = false; *hasResultType = false; break;
+ case OpExtInstImport: *hasResult = true; *hasResultType = false; break;
+ case OpExtInst: *hasResult = true; *hasResultType = true; break;
+ case OpMemoryModel: *hasResult = false; *hasResultType = false; break;
+ case OpEntryPoint: *hasResult = false; *hasResultType = false; break;
+ case OpExecutionMode: *hasResult = false; *hasResultType = false; break;
+ case OpCapability: *hasResult = false; *hasResultType = false; break;
+ case OpTypeVoid: *hasResult = true; *hasResultType = false; break;
+ case OpTypeBool: *hasResult = true; *hasResultType = false; break;
+ case OpTypeInt: *hasResult = true; *hasResultType = false; break;
+ case OpTypeFloat: *hasResult = true; *hasResultType = false; break;
+ case OpTypeVector: *hasResult = true; *hasResultType = false; break;
+ case OpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+ case OpTypeImage: *hasResult = true; *hasResultType = false; break;
+ case OpTypeSampler: *hasResult = true; *hasResultType = false; break;
+ case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+ case OpTypeArray: *hasResult = true; *hasResultType = false; break;
+ case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+ case OpTypeStruct: *hasResult = true; *hasResultType = false; break;
+ case OpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+ case OpTypePointer: *hasResult = true; *hasResultType = false; break;
+ case OpTypeFunction: *hasResult = true; *hasResultType = false; break;
+ case OpTypeEvent: *hasResult = true; *hasResultType = false; break;
+ case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+ case OpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+ case OpTypeQueue: *hasResult = true; *hasResultType = false; break;
+ case OpTypePipe: *hasResult = true; *hasResultType = false; break;
+ case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+ case OpConstantTrue: *hasResult = true; *hasResultType = true; break;
+ case OpConstantFalse: *hasResult = true; *hasResultType = true; break;
+ case OpConstant: *hasResult = true; *hasResultType = true; break;
+ case OpConstantComposite: *hasResult = true; *hasResultType = true; break;
+ case OpConstantSampler: *hasResult = true; *hasResultType = true; break;
+ case OpConstantNull: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstant: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+ case OpFunction: *hasResult = true; *hasResultType = true; break;
+ case OpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+ case OpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+ case OpFunctionCall: *hasResult = true; *hasResultType = true; break;
+ case OpVariable: *hasResult = true; *hasResultType = true; break;
+ case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+ case OpLoad: *hasResult = true; *hasResultType = true; break;
+ case OpStore: *hasResult = false; *hasResultType = false; break;
+ case OpCopyMemory: *hasResult = false; *hasResultType = false; break;
+ case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+ case OpAccessChain: *hasResult = true; *hasResultType = true; break;
+ case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+ case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+ case OpArrayLength: *hasResult = true; *hasResultType = true; break;
+ case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+ case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+ case OpDecorate: *hasResult = false; *hasResultType = false; break;
+ case OpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+ case OpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+ case OpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+ case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+ case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+ case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+ case OpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+ case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+ case OpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+ case OpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+ case OpCopyObject: *hasResult = true; *hasResultType = true; break;
+ case OpTranspose: *hasResult = true; *hasResultType = true; break;
+ case OpSampledImage: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageFetch: *hasResult = true; *hasResultType = true; break;
+ case OpImageGather: *hasResult = true; *hasResultType = true; break;
+ case OpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+ case OpImageRead: *hasResult = true; *hasResultType = true; break;
+ case OpImageWrite: *hasResult = false; *hasResultType = false; break;
+ case OpImage: *hasResult = true; *hasResultType = true; break;
+ case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+ case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+ case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+ case OpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+ case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+ case OpConvertFToU: *hasResult = true; *hasResultType = true; break;
+ case OpConvertFToS: *hasResult = true; *hasResultType = true; break;
+ case OpConvertSToF: *hasResult = true; *hasResultType = true; break;
+ case OpConvertUToF: *hasResult = true; *hasResultType = true; break;
+ case OpUConvert: *hasResult = true; *hasResultType = true; break;
+ case OpSConvert: *hasResult = true; *hasResultType = true; break;
+ case OpFConvert: *hasResult = true; *hasResultType = true; break;
+ case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+ case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+ case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+ case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+ case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+ case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+ case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+ case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+ case OpBitcast: *hasResult = true; *hasResultType = true; break;
+ case OpSNegate: *hasResult = true; *hasResultType = true; break;
+ case OpFNegate: *hasResult = true; *hasResultType = true; break;
+ case OpIAdd: *hasResult = true; *hasResultType = true; break;
+ case OpFAdd: *hasResult = true; *hasResultType = true; break;
+ case OpISub: *hasResult = true; *hasResultType = true; break;
+ case OpFSub: *hasResult = true; *hasResultType = true; break;
+ case OpIMul: *hasResult = true; *hasResultType = true; break;
+ case OpFMul: *hasResult = true; *hasResultType = true; break;
+ case OpUDiv: *hasResult = true; *hasResultType = true; break;
+ case OpSDiv: *hasResult = true; *hasResultType = true; break;
+ case OpFDiv: *hasResult = true; *hasResultType = true; break;
+ case OpUMod: *hasResult = true; *hasResultType = true; break;
+ case OpSRem: *hasResult = true; *hasResultType = true; break;
+ case OpSMod: *hasResult = true; *hasResultType = true; break;
+ case OpFRem: *hasResult = true; *hasResultType = true; break;
+ case OpFMod: *hasResult = true; *hasResultType = true; break;
+ case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+ case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+ case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+ case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+ case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+ case OpOuterProduct: *hasResult = true; *hasResultType = true; break;
+ case OpDot: *hasResult = true; *hasResultType = true; break;
+ case OpIAddCarry: *hasResult = true; *hasResultType = true; break;
+ case OpISubBorrow: *hasResult = true; *hasResultType = true; break;
+ case OpUMulExtended: *hasResult = true; *hasResultType = true; break;
+ case OpSMulExtended: *hasResult = true; *hasResultType = true; break;
+ case OpAny: *hasResult = true; *hasResultType = true; break;
+ case OpAll: *hasResult = true; *hasResultType = true; break;
+ case OpIsNan: *hasResult = true; *hasResultType = true; break;
+ case OpIsInf: *hasResult = true; *hasResultType = true; break;
+ case OpIsFinite: *hasResult = true; *hasResultType = true; break;
+ case OpIsNormal: *hasResult = true; *hasResultType = true; break;
+ case OpSignBitSet: *hasResult = true; *hasResultType = true; break;
+ case OpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+ case OpOrdered: *hasResult = true; *hasResultType = true; break;
+ case OpUnordered: *hasResult = true; *hasResultType = true; break;
+ case OpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+ case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+ case OpLogicalOr: *hasResult = true; *hasResultType = true; break;
+ case OpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+ case OpLogicalNot: *hasResult = true; *hasResultType = true; break;
+ case OpSelect: *hasResult = true; *hasResultType = true; break;
+ case OpIEqual: *hasResult = true; *hasResultType = true; break;
+ case OpINotEqual: *hasResult = true; *hasResultType = true; break;
+ case OpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case OpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpULessThan: *hasResult = true; *hasResultType = true; break;
+ case OpSLessThan: *hasResult = true; *hasResultType = true; break;
+ case OpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+ case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+ case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+ case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+ case OpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+ case OpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+ case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+ case OpNot: *hasResult = true; *hasResultType = true; break;
+ case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+ case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+ case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+ case OpBitReverse: *hasResult = true; *hasResultType = true; break;
+ case OpBitCount: *hasResult = true; *hasResultType = true; break;
+ case OpDPdx: *hasResult = true; *hasResultType = true; break;
+ case OpDPdy: *hasResult = true; *hasResultType = true; break;
+ case OpFwidth: *hasResult = true; *hasResultType = true; break;
+ case OpDPdxFine: *hasResult = true; *hasResultType = true; break;
+ case OpDPdyFine: *hasResult = true; *hasResultType = true; break;
+ case OpFwidthFine: *hasResult = true; *hasResultType = true; break;
+ case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+ case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+ case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+ case OpEmitVertex: *hasResult = false; *hasResultType = false; break;
+ case OpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+ case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+ case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+ case OpControlBarrier: *hasResult = false; *hasResultType = false; break;
+ case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+ case OpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicStore: *hasResult = false; *hasResultType = false; break;
+ case OpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicISub: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicOr: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicXor: *hasResult = true; *hasResultType = true; break;
+ case OpPhi: *hasResult = true; *hasResultType = true; break;
+ case OpLoopMerge: *hasResult = false; *hasResultType = false; break;
+ case OpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+ case OpLabel: *hasResult = true; *hasResultType = false; break;
+ case OpBranch: *hasResult = false; *hasResultType = false; break;
+ case OpBranchConditional: *hasResult = false; *hasResultType = false; break;
+ case OpSwitch: *hasResult = false; *hasResultType = false; break;
+ case OpKill: *hasResult = false; *hasResultType = false; break;
+ case OpReturn: *hasResult = false; *hasResultType = false; break;
+ case OpReturnValue: *hasResult = false; *hasResultType = false; break;
+ case OpUnreachable: *hasResult = false; *hasResultType = false; break;
+ case OpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+ case OpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+ case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+ case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+ case OpGroupAll: *hasResult = true; *hasResultType = true; break;
+ case OpGroupAny: *hasResult = true; *hasResultType = true; break;
+ case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+ case OpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupUMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupSMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFMax: *hasResult = true; *hasResultType = true; break;
+ case OpGroupUMax: *hasResult = true; *hasResultType = true; break;
+ case OpGroupSMax: *hasResult = true; *hasResultType = true; break;
+ case OpReadPipe: *hasResult = true; *hasResultType = true; break;
+ case OpWritePipe: *hasResult = true; *hasResultType = true; break;
+ case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+ case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+ case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+ case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+ case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+ case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+ case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+ case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+ case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+ case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+ case OpRetainEvent: *hasResult = false; *hasResultType = false; break;
+ case OpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+ case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+ case OpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+ case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+ case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+ case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+ case OpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+ case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+ case OpNoLine: *hasResult = false; *hasResultType = false; break;
+ case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+ case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+ case OpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+ case OpSizeOf: *hasResult = true; *hasResultType = true; break;
+ case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+ case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+ case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+ case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+ case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+ case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+ case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+ case OpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+ case OpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+ case OpDecorateId: *hasResult = false; *hasResultType = false; break;
+ case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+ case OpCopyLogical: *hasResult = true; *hasResultType = true; break;
+ case OpPtrEqual: *hasResult = true; *hasResultType = true; break;
+ case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+ case OpPtrDiff: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+ case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+ case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+ case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+ case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+ case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+ case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+ case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+ case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+ case OpTraceNV: *hasResult = false; *hasResultType = false; break;
+ case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+ case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+ case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+ case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+ case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+ case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+ case OpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break;
+ case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+ case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+ case OpDecorateString: *hasResult = false; *hasResultType = false; break;
+ case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+ case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+ }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
// Overload operator| for mask bit combining
inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); }
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
index 2f3cf25..463c756 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp
@@ -61,7 +61,7 @@ void CFG::build_immediate_dominators()
if (immediate_dominators[block])
{
assert(immediate_dominators[edge]);
- immediate_dominators[block] = find_common_dominator(block, edge);
+ immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge);
}
else
immediate_dominators[block] = edge;
@@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const
// We have a back edge if the visit order is set with the temporary magic value 0.
// Crossing edges will have already been recorded with a visit order.
auto itr = visit_order.find(to);
- assert(itr != end(visit_order));
- return itr->second.get() == 0;
+ return itr != end(visit_order) && itr->second.get() == 0;
+}
+
+bool CFG::has_visited_forward_edge(uint32_t to) const
+{
+ // If > 0, we have visited the edge already, and this is not a back edge branch.
+ auto itr = visit_order.find(to);
+ return itr != end(visit_order) && itr->second.get() > 0;
}
bool CFG::post_order_visit(uint32_t block_id)
@@ -83,14 +89,30 @@ bool CFG::post_order_visit(uint32_t block_id)
// If we have already branched to this block (back edge), stop recursion.
// If our branches are back-edges, we do not record them.
// We have to record crossing edges however.
- if (visit_order[block_id].get() >= 0)
- return !is_back_edge(block_id);
+ if (has_visited_forward_edge(block_id))
+ return true;
+ else if (is_back_edge(block_id))
+ return false;
// Block back-edges from recursively revisiting ourselves.
visit_order[block_id].get() = 0;
- // First visit our branch targets.
auto &block = compiler.get<SPIRBlock>(block_id);
+
+ // If this is a loop header, add an implied branch to the merge target.
+ // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
+ // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
+ // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
+ // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
+
+ // Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop
+ // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
+ // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
+ // but for loops, only the header might end up actually branching to merge block.
+ if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+ add_branch(block_id, block.merge_block);
+
+ // First visit our branch targets.
switch (block.terminator)
{
case SPIRBlock::Direct:
@@ -119,12 +141,56 @@ bool CFG::post_order_visit(uint32_t block_id)
break;
}
- // If this is a loop header, add an implied branch to the merge target.
- // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
- // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
- // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
- if (block.merge == SPIRBlock::MergeLoop)
- add_branch(block_id, block.merge_block);
+ // If this is a selection merge, add an implied branch to the merge target.
+ // This is needed to avoid cases where an inner branch dominates the outer branch.
+ // This can happen if one of the branches exit early, e.g.:
+ // if (cond) { ...; break; } else { var = 100 } use_var(var);
+ // We can use the variable without a Phi since there is only one possible parent here.
+ // However, in this case, we need to hoist out the inner variable to outside the branch.
+ // Use same strategy as loops.
+ if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
+ {
+ // If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
+ // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
+ // will be hoisted out to outside the selection merge.
+ // If size > 1, the variable will be automatically hoisted, so we should not mess with it.
+ // The exception here is switch blocks, where we can have multiple edges to merge block,
+ // all coming from same scope, so be more conservative in this case.
+ // Adding fake branches unconditionally breaks parameter preservation analysis,
+ // which looks at how variables are accessed through the CFG.
+ auto pred_itr = preceding_edges.find(block.next_block);
+ if (pred_itr != end(preceding_edges))
+ {
+ auto &pred = pred_itr->second;
+ auto succ_itr = succeeding_edges.find(block_id);
+ size_t num_succeeding_edges = 0;
+ if (succ_itr != end(succeeding_edges))
+ num_succeeding_edges = succ_itr->second.size();
+
+ if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1)
+ {
+ // Multiple branches can come from the same scope due to "break;", so we need to assume that all branches
+ // come from same case scope in worst case, even if there are multiple preceding edges.
+ // If we have more than one succeeding edge from the block header, it should be impossible
+ // to have a dominator be inside the block.
+ // Only case this can go wrong is if we have 2 or more edges from block header and
+ // 2 or more edges to merge block, and still have dominator be inside a case label.
+ if (!pred.empty())
+ add_branch(block_id, block.next_block);
+ }
+ else
+ {
+ if (pred.size() == 1 && *pred.begin() != block_id)
+ add_branch(block_id, block.next_block);
+ }
+ }
+ else
+ {
+ // If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it.
+ // We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge.
+ add_branch(block_id, block.next_block);
+ }
+ }
// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
visit_order[block_id].get() = ++visit_count;
@@ -152,6 +218,111 @@ void CFG::add_branch(uint32_t from, uint32_t to)
add_unique(succeeding_edges[from], to);
}
+uint32_t CFG::find_loop_dominator(uint32_t block_id) const
+{
+ while (block_id != SPIRBlock::NoDominator)
+ {
+ auto itr = preceding_edges.find(block_id);
+ if (itr == end(preceding_edges))
+ return SPIRBlock::NoDominator;
+ if (itr->second.empty())
+ return SPIRBlock::NoDominator;
+
+ uint32_t pred_block_id = SPIRBlock::NoDominator;
+ bool ignore_loop_header = false;
+
+ // If we are a merge block, go directly to the header block.
+ // Only consider a loop dominator if we are branching from inside a block to a loop header.
+ // NOTE: In the CFG we forced an edge from header to merge block always to support variable scopes properly.
+ for (auto &pred : itr->second)
+ {
+ auto &pred_block = compiler.get<SPIRBlock>(pred);
+ if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id))
+ {
+ pred_block_id = pred;
+ ignore_loop_header = true;
+ break;
+ }
+ else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id))
+ {
+ pred_block_id = pred;
+ break;
+ }
+ }
+
+ // No merge block means we can just pick any edge. Loop headers dominate the inner loop, so any path we
+ // take will lead there.
+ if (pred_block_id == SPIRBlock::NoDominator)
+ pred_block_id = itr->second.front();
+
+ block_id = pred_block_id;
+
+ if (!ignore_loop_header && block_id)
+ {
+ auto &block = compiler.get<SPIRBlock>(block_id);
+ if (block.merge == SPIRBlock::MergeLoop)
+ return block_id;
+ }
+ }
+
+ return block_id;
+}
+
+bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const
+{
+ // Walk backwards, starting from "to" block.
+ // Only follow pred edges if they have a 1:1 relationship, or a merge relationship.
+ // If we cannot find a path to "from", we must assume that to is inside control flow in some way.
+
+ auto &from_block = compiler.get<SPIRBlock>(from);
+ BlockID ignore_block_id = 0;
+ if (from_block.merge == SPIRBlock::MergeLoop)
+ ignore_block_id = from_block.merge_block;
+
+ while (to != from)
+ {
+ auto pred_itr = preceding_edges.find(to);
+ if (pred_itr == end(preceding_edges))
+ return false;
+
+ DominatorBuilder builder(*this);
+ for (auto &edge : pred_itr->second)
+ builder.add_block(edge);
+
+ uint32_t dominator = builder.get_dominator();
+ if (dominator == 0)
+ return false;
+
+ auto &dom = compiler.get<SPIRBlock>(dominator);
+
+ bool true_path_ignore = false;
+ bool false_path_ignore = false;
+ if (ignore_block_id && dom.terminator == SPIRBlock::Select)
+ {
+ auto &true_block = compiler.get<SPIRBlock>(dom.true_block);
+ auto &false_block = compiler.get<SPIRBlock>(dom.false_block);
+ auto &ignore_block = compiler.get<SPIRBlock>(ignore_block_id);
+ true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block);
+ false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block);
+ }
+
+ if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) ||
+ (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) ||
+ (dom.terminator == SPIRBlock::Direct && dom.next_block == to) ||
+ (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) ||
+ (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore))
+ {
+ // Allow walking selection constructs if the other branch reaches out of a loop construct.
+ // It cannot be in-scope anymore.
+ to = dominator;
+ }
+ else
+ return false;
+ }
+
+ return true;
+}
+
DominatorBuilder::DominatorBuilder(const CFG &cfg_)
: cfg(cfg_)
{
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
index be10371..5f62cca 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp
@@ -88,11 +88,17 @@ public:
return;
seen_blocks.insert(block);
- op(block);
- for (auto b : get_succeeding_edges(block))
- walk_from(seen_blocks, b, op);
+ if (op(block))
+ {
+ for (auto b : get_succeeding_edges(block))
+ walk_from(seen_blocks, b, op);
+ }
}
+ uint32_t find_loop_dominator(uint32_t block) const;
+
+ bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const;
+
private:
struct VisitOrder
{
@@ -125,6 +131,7 @@ private:
uint32_t visit_count = 0;
bool is_back_edge(uint32_t to) const;
+ bool has_visited_forward_edge(uint32_t to) const;
};
class DominatorBuilder
diff --git a/src/3rdparty/SPIRV-Cross/spirv_common.hpp b/src/3rdparty/SPIRV-Cross/spirv_common.hpp
index 0cf1f56..c1c6fc8 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_common.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_common.hpp
@@ -20,6 +20,7 @@
#include "spirv.hpp"
#include "spirv_cross_containers.hpp"
#include "spirv_cross_error_handling.hpp"
+#include <functional>
// A bit crude, but allows projects which embed SPIRV-Cross statically to
// effectively hide all the symbols from other projects.
@@ -183,14 +184,14 @@ std::string join(Ts &&... ts)
return stream.str();
}
-inline std::string merge(const SmallVector<std::string> &list)
+inline std::string merge(const SmallVector<std::string> &list, const char *between = ", ")
{
StringStream<> stream;
for (auto &elem : list)
{
stream << elem;
if (&elem != &list.back())
- stream << ", ";
+ stream << between;
}
return stream.str();
}
@@ -270,20 +271,6 @@ struct Instruction
uint32_t length = 0;
};
-// Helper for Variant interface.
-struct IVariant
-{
- virtual ~IVariant() = default;
- virtual IVariant *clone(ObjectPoolBase *pool) = 0;
- uint32_t self = 0;
-};
-
-#define SPIRV_CROSS_DECLARE_CLONE(T) \
- IVariant *clone(ObjectPoolBase *pool) override \
- { \
- return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
- }
-
enum Types
{
TypeNone,
@@ -299,9 +286,140 @@ enum Types
TypeCombinedImageSampler,
TypeAccessChain,
TypeUndef,
+ TypeString,
TypeCount
};
+template <Types type>
+class TypedID;
+
+template <>
+class TypedID<TypeNone>
+{
+public:
+ TypedID() = default;
+ TypedID(uint32_t id_)
+ : id(id_)
+ {
+ }
+
+ template <Types U>
+ TypedID(const TypedID<U> &other)
+ {
+ *this = other;
+ }
+
+ template <Types U>
+ TypedID &operator=(const TypedID<U> &other)
+ {
+ id = uint32_t(other);
+ return *this;
+ }
+
+ // Implicit conversion to u32 is desired here.
+ // As long as we block implicit conversion between TypedID<A> and TypedID<B> we're good.
+ operator uint32_t() const
+ {
+ return id;
+ }
+
+ template <Types U>
+ operator TypedID<U>() const
+ {
+ return TypedID<U>(*this);
+ }
+
+ bool operator==(const TypedID &other) const
+ {
+ return id == other.id;
+ }
+
+ bool operator!=(const TypedID &other) const
+ {
+ return id != other.id;
+ }
+
+ template <Types type>
+ bool operator==(const TypedID<type> &other) const
+ {
+ return id == uint32_t(other);
+ }
+
+ template <Types type>
+ bool operator!=(const TypedID<type> &other) const
+ {
+ return id != uint32_t(other);
+ }
+
+private:
+ uint32_t id = 0;
+};
+
+template <Types type>
+class TypedID
+{
+public:
+ TypedID() = default;
+ TypedID(uint32_t id_)
+ : id(id_)
+ {
+ }
+
+ explicit TypedID(const TypedID<TypeNone> &other)
+ : id(uint32_t(other))
+ {
+ }
+
+ operator uint32_t() const
+ {
+ return id;
+ }
+
+ bool operator==(const TypedID &other) const
+ {
+ return id == other.id;
+ }
+
+ bool operator!=(const TypedID &other) const
+ {
+ return id != other.id;
+ }
+
+ bool operator==(const TypedID<TypeNone> &other) const
+ {
+ return id == uint32_t(other);
+ }
+
+ bool operator!=(const TypedID<TypeNone> &other) const
+ {
+ return id != uint32_t(other);
+ }
+
+private:
+ uint32_t id = 0;
+};
+
+using VariableID = TypedID<TypeVariable>;
+using TypeID = TypedID<TypeType>;
+using ConstantID = TypedID<TypeConstant>;
+using FunctionID = TypedID<TypeFunction>;
+using BlockID = TypedID<TypeBlock>;
+using ID = TypedID<TypeNone>;
+
+// Helper for Variant interface.
+struct IVariant
+{
+ virtual ~IVariant() = default;
+ virtual IVariant *clone(ObjectPoolBase *pool) = 0;
+ ID self = 0;
+};
+
+#define SPIRV_CROSS_DECLARE_CLONE(T) \
+ IVariant *clone(ObjectPoolBase *pool) override \
+ { \
+ return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
+ }
+
struct SPIRUndef : IVariant
{
enum
@@ -309,15 +427,32 @@ struct SPIRUndef : IVariant
type = TypeUndef
};
- explicit SPIRUndef(uint32_t basetype_)
+ explicit SPIRUndef(TypeID basetype_)
: basetype(basetype_)
{
}
- uint32_t basetype;
+ TypeID basetype;
SPIRV_CROSS_DECLARE_CLONE(SPIRUndef)
};
+struct SPIRString : IVariant
+{
+ enum
+ {
+ type = TypeString
+ };
+
+ explicit SPIRString(std::string str_)
+ : str(std::move(str_))
+ {
+ }
+
+ std::string str;
+
+ SPIRV_CROSS_DECLARE_CLONE(SPIRString)
+};
+
// This type is only used by backends which need to access the combined image and sampler IDs separately after
// the OpSampledImage opcode.
struct SPIRCombinedImageSampler : IVariant
@@ -326,15 +461,15 @@ struct SPIRCombinedImageSampler : IVariant
{
type = TypeCombinedImageSampler
};
- SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_)
+ SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_)
: combined_type(type_)
, image(image_)
, sampler(sampler_)
{
}
- uint32_t combined_type;
- uint32_t image;
- uint32_t sampler;
+ TypeID combined_type;
+ VariableID image;
+ VariableID sampler;
SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler)
};
@@ -346,16 +481,18 @@ struct SPIRConstantOp : IVariant
type = TypeConstantOp
};
- SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length)
+ SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length)
: opcode(op)
- , arguments(args, args + length)
, basetype(result_type)
{
+ arguments.reserve(length);
+ for (uint32_t i = 0; i < length; i++)
+ arguments.push_back(args[i]);
}
spv::Op opcode;
SmallVector<uint32_t> arguments;
- uint32_t basetype;
+ TypeID basetype;
SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp)
};
@@ -418,11 +555,11 @@ struct SPIRType : IVariant
spv::StorageClass storage = spv::StorageClassGeneric;
- SmallVector<uint32_t> member_types;
+ SmallVector<TypeID> member_types;
struct ImageType
{
- uint32_t type;
+ TypeID type;
spv::Dim dim;
bool depth;
bool arrayed;
@@ -435,11 +572,11 @@ struct SPIRType : IVariant
// Structs can be declared multiple times if they are used as part of interface blocks.
// We want to detect this so that we only emit the struct definition once.
// Since we cannot rely on OpName to be equal, we need to figure out aliases.
- uint32_t type_alias = 0;
+ TypeID type_alias = 0;
// Denotes the type which this type is based on.
// Allows the backend to traverse how a complex type is built up during access chains.
- uint32_t parent_type = 0;
+ TypeID parent_type = 0;
// Used in backends to avoid emitting members with conflicting names.
std::unordered_set<std::string> member_name_cache;
@@ -458,6 +595,7 @@ struct SPIRExtension : IVariant
{
Unsupported,
GLSL,
+ SPV_debug_info,
SPV_AMD_shader_ballot,
SPV_AMD_shader_explicit_vertex_parameter,
SPV_AMD_shader_trinary_minmax,
@@ -477,7 +615,7 @@ struct SPIRExtension : IVariant
// so in order to avoid conflicts, we can't stick them in the ids array.
struct SPIREntryPoint
{
- SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name)
+ SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name)
: self(self_)
, name(entry_name)
, orig_name(entry_name)
@@ -486,10 +624,10 @@ struct SPIREntryPoint
}
SPIREntryPoint() = default;
- uint32_t self = 0;
+ FunctionID self = 0;
std::string name;
std::string orig_name;
- SmallVector<uint32_t> interface_variables;
+ SmallVector<VariableID> interface_variables;
Bitset flags;
struct
@@ -510,7 +648,7 @@ struct SPIRExpression : IVariant
};
// Only created by the backend target to avoid creating tons of temporaries.
- SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_)
+ SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_)
: expression(move(expr))
, expression_type(expression_type_)
, immutable(immutable_)
@@ -520,14 +658,14 @@ struct SPIRExpression : IVariant
// If non-zero, prepend expression with to_expression(base_expression).
// Used in amortizing multiple calls to to_expression()
// where in certain cases that would quickly force a temporary when not needed.
- uint32_t base_expression = 0;
+ ID base_expression = 0;
std::string expression;
- uint32_t expression_type = 0;
+ TypeID expression_type = 0;
// If this expression is a forwarded load,
// allow us to reference the original variable.
- uint32_t loaded_from = 0;
+ ID loaded_from = 0;
// If this expression will never change, we can avoid lots of temporaries
// in high level source.
@@ -543,11 +681,11 @@ struct SPIRExpression : IVariant
bool access_chain = false;
// A list of expressions which this expression depends on.
- SmallVector<uint32_t> expression_dependencies;
+ SmallVector<ID> expression_dependencies;
// By reading this expression, we implicitly read these expressions as well.
// Used by access chain Store and Load since we read multiple expressions in this case.
- SmallVector<uint32_t> implied_read_expressions;
+ SmallVector<ID> implied_read_expressions;
SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
};
@@ -559,12 +697,12 @@ struct SPIRFunctionPrototype : IVariant
type = TypeFunctionPrototype
};
- explicit SPIRFunctionPrototype(uint32_t return_type_)
+ explicit SPIRFunctionPrototype(TypeID return_type_)
: return_type(return_type_)
{
}
- uint32_t return_type;
+ TypeID return_type;
SmallVector<uint32_t> parameter_types;
SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype)
@@ -639,23 +777,23 @@ struct SPIRBlock : IVariant
Terminator terminator = Unknown;
Merge merge = MergeNone;
Hints hint = HintNone;
- uint32_t next_block = 0;
- uint32_t merge_block = 0;
- uint32_t continue_block = 0;
+ BlockID next_block = 0;
+ BlockID merge_block = 0;
+ BlockID continue_block = 0;
- uint32_t return_value = 0; // If 0, return nothing (void).
- uint32_t condition = 0;
- uint32_t true_block = 0;
- uint32_t false_block = 0;
- uint32_t default_block = 0;
+ ID return_value = 0; // If 0, return nothing (void).
+ ID condition = 0;
+ BlockID true_block = 0;
+ BlockID false_block = 0;
+ BlockID default_block = 0;
SmallVector<Instruction> ops;
struct Phi
{
- uint32_t local_variable; // flush local variable ...
- uint32_t parent; // If we're in from_block and want to branch into this block ...
- uint32_t function_variable; // to this function-global "phi" variable first.
+ ID local_variable; // flush local variable ...
+ BlockID parent; // If we're in from_block and want to branch into this block ...
+ VariableID function_variable; // to this function-global "phi" variable first.
};
// Before entering this block flush out local variables to magical "phi" variables.
@@ -663,16 +801,16 @@ struct SPIRBlock : IVariant
// Declare these temporaries before beginning the block.
// Used for handling complex continue blocks which have side effects.
- SmallVector<std::pair<uint32_t, uint32_t>> declare_temporary;
+ SmallVector<std::pair<TypeID, ID>> declare_temporary;
// Declare these temporaries, but only conditionally if this block turns out to be
// a complex loop header.
- SmallVector<std::pair<uint32_t, uint32_t>> potential_declare_temporary;
+ SmallVector<std::pair<TypeID, ID>> potential_declare_temporary;
struct Case
{
uint32_t value;
- uint32_t block;
+ BlockID block;
};
SmallVector<Case> cases;
@@ -686,23 +824,27 @@ struct SPIRBlock : IVariant
// Do we need a ladder variable to defer breaking out of a loop construct after a switch block?
bool need_ladder_break = false;
+ // If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch.
+ // Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi.
+ BlockID ignore_phi_from_block = 0;
+
// The dominating block which this block might be within.
// Used in continue; blocks to determine if we really need to write continue.
- uint32_t loop_dominator = 0;
+ BlockID loop_dominator = 0;
// All access to these variables are dominated by this block,
// so before branching anywhere we need to make sure that we declare these variables.
- SmallVector<uint32_t> dominated_variables;
+ SmallVector<VariableID> dominated_variables;
// These are variables which should be declared in a for loop header, if we
// fail to use a classic for-loop,
// we remove these variables, and fall back to regular variables outside the loop.
- SmallVector<uint32_t> loop_variables;
+ SmallVector<VariableID> loop_variables;
// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
// sub-group-like operations.
// Make sure that we only use these expressions in the original block.
- SmallVector<uint32_t> invalidate_expressions;
+ SmallVector<ID> invalidate_expressions;
SPIRV_CROSS_DECLARE_CLONE(SPIRBlock)
};
@@ -714,7 +856,7 @@ struct SPIRFunction : IVariant
type = TypeFunction
};
- SPIRFunction(uint32_t return_type_, uint32_t function_type_)
+ SPIRFunction(TypeID return_type_, TypeID function_type_)
: return_type(return_type_)
, function_type(function_type_)
{
@@ -722,8 +864,8 @@ struct SPIRFunction : IVariant
struct Parameter
{
- uint32_t type;
- uint32_t id;
+ TypeID type;
+ ID id;
uint32_t read_count;
uint32_t write_count;
@@ -745,33 +887,40 @@ struct SPIRFunction : IVariant
// or a global ID.
struct CombinedImageSamplerParameter
{
- uint32_t id;
- uint32_t image_id;
- uint32_t sampler_id;
+ VariableID id;
+ VariableID image_id;
+ VariableID sampler_id;
bool global_image;
bool global_sampler;
bool depth;
};
- uint32_t return_type;
- uint32_t function_type;
+ TypeID return_type;
+ TypeID function_type;
SmallVector<Parameter> arguments;
// Can be used by backends to add magic arguments.
// Currently used by combined image/sampler implementation.
SmallVector<Parameter> shadow_arguments;
- SmallVector<uint32_t> local_variables;
- uint32_t entry_block = 0;
- SmallVector<uint32_t> blocks;
+ SmallVector<VariableID> local_variables;
+ BlockID entry_block = 0;
+ SmallVector<BlockID> blocks;
SmallVector<CombinedImageSamplerParameter> combined_parameters;
- void add_local_variable(uint32_t id)
+ struct EntryLine
+ {
+ uint32_t file_id = 0;
+ uint32_t line_literal = 0;
+ };
+ EntryLine entry_line;
+
+ void add_local_variable(VariableID id)
{
local_variables.push_back(id);
}
- void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false)
+ void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false)
{
// Arguments are read-only until proven otherwise.
arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable });
@@ -792,7 +941,7 @@ struct SPIRFunction : IVariant
// On function entry, make sure to copy a constant array into thread addr space to work around
// the case where we are passing a constant array by value to a function on backends which do not
// consider arrays value types.
- SmallVector<uint32_t> constant_arrays_needed_on_stack;
+ SmallVector<ID> constant_arrays_needed_on_stack;
bool active = false;
bool flush_undeclared = true;
@@ -808,7 +957,7 @@ struct SPIRAccessChain : IVariant
type = TypeAccessChain
};
- SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
+ SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
int32_t static_index_)
: basetype(basetype_)
, storage(storage_)
@@ -823,20 +972,20 @@ struct SPIRAccessChain : IVariant
// which has no usable buffer type ala GLSL SSBOs.
// StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses.
- uint32_t basetype;
+ TypeID basetype;
spv::StorageClass storage;
std::string base;
std::string dynamic_index;
int32_t static_index;
- uint32_t loaded_from = 0;
+ VariableID loaded_from = 0;
uint32_t matrix_stride = 0;
bool row_major_matrix = false;
bool immutable = false;
// By reading this expression, we implicitly read these expressions as well.
// Used by access chain Store and Load since we read multiple expressions in this case.
- SmallVector<uint32_t> implied_read_expressions;
+ SmallVector<ID> implied_read_expressions;
SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain)
};
@@ -849,7 +998,7 @@ struct SPIRVariable : IVariant
};
SPIRVariable() = default;
- SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0)
+ SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0)
: basetype(basetype_)
, storage(storage_)
, initializer(initializer_)
@@ -857,11 +1006,11 @@ struct SPIRVariable : IVariant
{
}
- uint32_t basetype = 0;
+ TypeID basetype = 0;
spv::StorageClass storage = spv::StorageClassGeneric;
uint32_t decoration = 0;
- uint32_t initializer = 0;
- uint32_t basevariable = 0;
+ ID initializer = 0;
+ VariableID basevariable = 0;
SmallVector<uint32_t> dereference_chain;
bool compat_builtin = false;
@@ -871,10 +1020,10 @@ struct SPIRVariable : IVariant
// When we read the variable as an expression, just forward
// shadowed_id as the expression.
bool statically_assigned = false;
- uint32_t static_expression = 0;
+ ID static_expression = 0;
// Temporaries which can remain forwarded as long as this variable is not modified.
- SmallVector<uint32_t> dependees;
+ SmallVector<ID> dependees;
bool forwardable = true;
bool deferred_declaration = false;
@@ -887,7 +1036,7 @@ struct SPIRVariable : IVariant
uint32_t remapped_components = 0;
// The block which dominates all access to this variable.
- uint32_t dominator = 0;
+ BlockID dominator = 0;
// If true, this variable is a loop variable, when accessing the variable
// outside a loop,
// we should statically forward it.
@@ -907,7 +1056,8 @@ struct SPIRConstant : IVariant
type = TypeConstant
};
- union Constant {
+ union Constant
+ {
uint32_t u32;
int32_t i32;
float f32;
@@ -921,15 +1071,12 @@ struct SPIRConstant : IVariant
{
Constant r[4];
// If != 0, this element is a specialization constant, and we should keep track of it as such.
- uint32_t id[4];
+ ID id[4];
uint32_t vecsize = 1;
- // Workaround for MSVC 2013, initializing an array breaks.
ConstantVector()
{
memset(r, 0, sizeof(r));
- for (unsigned i = 0; i < 4; i++)
- id[i] = 0;
}
};
@@ -937,15 +1084,8 @@ struct SPIRConstant : IVariant
{
ConstantVector c[4];
// If != 0, this column is a specialization constant, and we should keep track of it as such.
- uint32_t id[4];
+ ID id[4];
uint32_t columns = 1;
-
- // Workaround for MSVC 2013, initializing an array breaks.
- ConstantMatrix()
- {
- for (unsigned i = 0; i < 4; i++)
- id[i] = 0;
- }
};
static inline float f16_to_f32(uint16_t u16_value)
@@ -955,7 +1095,8 @@ struct SPIRConstant : IVariant
int e = (u16_value >> 10) & 0x1f;
int m = (u16_value >> 0) & 0x3ff;
- union {
+ union
+ {
float f32;
uint32_t u32;
} u;
@@ -1109,16 +1250,18 @@ struct SPIRConstant : IVariant
SPIRConstant() = default;
- SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
+ SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
: constant_type(constant_type_)
, specialization(specialized)
{
- subconstants.insert(std::end(subconstants), elements, elements + num_elements);
+ subconstants.reserve(num_elements);
+ for (uint32_t i = 0; i < num_elements; i++)
+ subconstants.push_back(elements[i]);
specialization = specialized;
}
// Construct scalar (32-bit).
- SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized)
+ SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized)
: constant_type(constant_type_)
, specialization(specialized)
{
@@ -1128,7 +1271,7 @@ struct SPIRConstant : IVariant
}
// Construct scalar (64-bit).
- SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized)
+ SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized)
: constant_type(constant_type_)
, specialization(specialized)
{
@@ -1138,7 +1281,7 @@ struct SPIRConstant : IVariant
}
// Construct vectors and matrices.
- SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
+ SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
bool specialized)
: constant_type(constant_type_)
, specialization(specialized)
@@ -1170,7 +1313,7 @@ struct SPIRConstant : IVariant
}
}
- uint32_t constant_type = 0;
+ TypeID constant_type = 0;
ConstantMatrix m;
// If this constant is a specialization constant (i.e. created with OpSpecConstant*).
@@ -1182,7 +1325,7 @@ struct SPIRConstant : IVariant
bool is_used_as_lut = false;
// For composites which are constant arrays, etc.
- SmallVector<uint32_t> subconstants;
+ SmallVector<ConstantID> subconstants;
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
// and uses them to initialize the constant. This allows the user
@@ -1317,9 +1460,9 @@ public:
return type;
}
- uint32_t get_id() const
+ ID get_id() const
{
- return holder ? holder->self : 0;
+ return holder ? holder->self : ID(0);
}
bool empty() const
@@ -1368,12 +1511,57 @@ T &variant_set(Variant &var, P &&... args)
struct AccessChainMeta
{
- uint32_t storage_packed_type = 0;
+ uint32_t storage_physical_type = 0;
bool need_transpose = false;
bool storage_is_packed = false;
bool storage_is_invariant = false;
};
+enum ExtendedDecorations
+{
+ // Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding.
+ SPIRVCrossDecorationBufferBlockRepacked = 0,
+
+ // A type in a buffer block might be declared with a different physical type than the logical type.
+ // If this is not set, PhysicalTypeID == the SPIR-V type as declared.
+ SPIRVCrossDecorationPhysicalTypeID,
+
+ // Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends.
+ // If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing
+ // is converting float3 to packed_float3 for example.
+ // If this is marked on a struct, it means the struct itself must use only Packed types for all its members.
+ SPIRVCrossDecorationPhysicalTypePacked,
+
+ // The padding in bytes before declaring this struct member.
+ // If used on a struct type, marks the target size of a struct.
+ SPIRVCrossDecorationPaddingTarget,
+
+ SPIRVCrossDecorationInterfaceMemberIndex,
+ SPIRVCrossDecorationInterfaceOrigID,
+ SPIRVCrossDecorationResourceIndexPrimary,
+ // Used for decorations like resource indices for samplers when part of combined image samplers.
+ // A variable might need to hold two resource indices in this case.
+ SPIRVCrossDecorationResourceIndexSecondary,
+ // Used for resource indices for multiplanar images when part of combined image samplers.
+ SPIRVCrossDecorationResourceIndexTertiary,
+ SPIRVCrossDecorationResourceIndexQuaternary,
+
+ // Marks a buffer block for using explicit offsets (GLSL/HLSL).
+ SPIRVCrossDecorationExplicitOffset,
+
+ // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase().
+ // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables.
+ SPIRVCrossDecorationBuiltInDispatchBase,
+
+ // Apply to a variable that is a function parameter; marks it as being a "dynamic"
+ // combined image-sampler. In MSL, this is used when a function parameter might hold
+ // either a regular combined image-sampler or one that has an attached sampler
+ // Y'CbCr conversion.
+ SPIRVCrossDecorationDynamicImageSampler,
+
+ SPIRVCrossDecorationCount
+};
+
struct Meta
{
struct Decoration
@@ -1396,13 +1584,17 @@ struct Meta
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
bool builtin = false;
- struct
+ struct Extended
{
- uint32_t packed_type = 0;
- bool packed = false;
- uint32_t ib_member_index = ~(0u);
- uint32_t ib_orig_id = 0;
- uint32_t argument_buffer_id = ~(0u);
+ Extended()
+ {
+ // MSVC 2013 workaround to init like this.
+ for (auto &v : values)
+ v = 0;
+ }
+
+ Bitset flags;
+ uint32_t values[SPIRVCrossDecorationCount];
} extended;
};
@@ -1510,4 +1702,16 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode)
}
} // namespace SPIRV_CROSS_NAMESPACE
+namespace std
+{
+template <SPIRV_CROSS_NAMESPACE::Types type>
+struct hash<SPIRV_CROSS_NAMESPACE::TypedID<type>>
+{
+ size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID<type> &value) const
+ {
+ return std::hash<uint32_t>()(value);
+ }
+};
+} // namespace std
+
#endif
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
index 90566c1..25966b3 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp
@@ -321,6 +321,8 @@ string CompilerCPP::compile()
backend.explicit_struct_type = true;
backend.use_initializer_list = true;
+ fixup_type_alias();
+ reorder_type_alias();
build_function_control_flow_graphs_and_analyze();
update_active_builtins();
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
index 6b66b74..c73ecdf 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp
@@ -17,6 +17,7 @@
#include "spirv_cross.hpp"
#include "GLSL.std.450.h"
#include "spirv_cfg.hpp"
+#include "spirv_common.hpp"
#include "spirv_parser.hpp"
#include <algorithm>
#include <cstring>
@@ -87,6 +88,10 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
bool Compiler::block_is_pure(const SPIRBlock &block)
{
+ // This is a global side effect of the function.
+ if (block.terminator == SPIRBlock::Kill)
+ return false;
+
for (auto &i : block.ops)
{
auto ops = stream(i);
@@ -155,6 +160,10 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
+ case OpDemoteToHelperInvocationEXT:
+ // This is a global side effect of the function.
+ return false;
+
default:
break;
}
@@ -176,7 +185,7 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const
{
// If the alias master has been specially packed, we will have emitted a clean variant as well,
// so skip the name aliasing here.
- if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+ if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
return to_name(type.type_alias);
}
}
@@ -264,6 +273,15 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
return var;
}
+StorageClass Compiler::get_backing_variable_storage(uint32_t ptr)
+{
+ auto *var = maybe_get_backing_variable(ptr);
+ if (var)
+ return var->storage;
+ else
+ return expression_type(ptr).storage;
+}
+
void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
{
auto &e = get<SPIRExpression>(expr);
@@ -568,7 +586,7 @@ ShaderResources Compiler::get_shader_resources() const
return get_shader_resources(nullptr);
}
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> &active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
{
return get_shader_resources(&active_variables);
}
@@ -708,6 +726,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
case OpAtomicAnd:
case OpAtomicOr:
case OpAtomicXor:
+ case OpArrayLength:
// Invalid SPIR-V.
if (length < 3)
return false;
@@ -724,16 +743,16 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
return true;
}
-unordered_set<uint32_t> Compiler::get_active_interface_variables() const
+unordered_set<VariableID> Compiler::get_active_interface_variables() const
{
// Traverse the call graph and find all interface variables which are in use.
- unordered_set<uint32_t> variables;
+ unordered_set<VariableID> variables;
InterfaceVariableAccessHandler handler(*this, variables);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
// Make sure we preserve output variables which are only initialized, but never accessed by any code.
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
- if (var.storage == StorageClassOutput && var.initializer != 0)
+ if (var.storage == StorageClassOutput && var.initializer != ID(0))
variables.insert(var.self);
});
@@ -744,16 +763,18 @@ unordered_set<uint32_t> Compiler::get_active_interface_variables() const
return variables;
}
-void Compiler::set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables)
+void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
{
active_interface_variables = move(active_variables);
check_active_interface_variables = true;
}
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
{
ShaderResources res;
+ bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
+
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
@@ -771,7 +792,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
if (has_decoration(type.self, DecorationBlock))
{
res.stage_inputs.push_back(
- { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+ { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
}
else
res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -787,7 +808,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
if (has_decoration(type.self, DecorationBlock))
{
res.stage_outputs.push_back(
- { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+ { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
}
else
res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -796,19 +817,19 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock))
{
res.uniform_buffers.push_back(
- { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+ { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
}
// Old way to declare SSBOs.
else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock))
{
res.storage_buffers.push_back(
- { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+ { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
}
// Modern way to declare SSBOs.
else if (type.storage == StorageClassStorageBuffer)
{
res.storage_buffers.push_back(
- { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
+ { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
}
// Push constant blocks
else if (type.storage == StorageClassPushConstant)
@@ -872,65 +893,6 @@ bool Compiler::type_is_block_like(const SPIRType &type) const
return false;
}
-void Compiler::fixup_type_alias()
-{
- // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
- // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
- // alias if the slave type is a block type.
- ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
- if (type.type_alias && type_is_block_like(type))
- {
- // Become the master.
- ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
- if (other_id == type.self)
- return;
-
- if (other_type.type_alias == type.type_alias)
- other_type.type_alias = type.self;
- });
-
- this->get<SPIRType>(type.type_alias).type_alias = self;
- type.type_alias = 0;
- }
- });
-
- ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
- if (type.type_alias && type_is_block_like(type))
- {
- // This is not allowed, drop the type_alias.
- type.type_alias = 0;
- }
- });
-
- // Reorder declaration of types so that the master of the type alias is always emitted first.
- // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
- // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
- auto &type_ids = ir.ids_for_type[TypeType];
- for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
- {
- auto &type = get<SPIRType>(*alias_itr);
- if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
- {
- // We will skip declaring this type, so make sure the type_alias type comes before.
- auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias);
- assert(master_itr != end(type_ids));
-
- if (alias_itr < master_itr)
- {
- // Must also swap the type order for the constant-type joined array.
- auto &joined_types = ir.ids_for_constant_or_type;
- auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
- auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
- assert(alt_alias_itr != end(joined_types));
- assert(alt_master_itr != end(joined_types));
-
- swap(*alias_itr, *master_itr);
- swap(*alt_alias_itr, *alt_master_itr);
- }
- }
- }
-}
-
void Compiler::parse_fixup()
{
// Figure out specialization constants for work group sizes.
@@ -964,8 +926,6 @@ void Compiler::parse_fixup()
aliased_variables.push_back(var.self);
}
}
-
- fixup_type_alias();
}
void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
@@ -1026,17 +986,17 @@ void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
update_name_cache(cache, cache, name);
}
-void Compiler::set_name(uint32_t id, const std::string &name)
+void Compiler::set_name(ID id, const std::string &name)
{
ir.set_name(id, name);
}
-const SPIRType &Compiler::get_type(uint32_t id) const
+const SPIRType &Compiler::get_type(TypeID id) const
{
return get<SPIRType>(id);
}
-const SPIRType &Compiler::get_type_from_variable(uint32_t id) const
+const SPIRType &Compiler::get_type_from_variable(VariableID id) const
{
return get<SPIRType>(get<SPIRVariable>(id).basetype);
}
@@ -1107,23 +1067,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type)
type.image.dim != DimBuffer;
}
-void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
const std::string &argument)
{
ir.set_member_decoration_string(id, index, decoration, argument);
}
-void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
{
ir.set_member_decoration(id, index, decoration, argument);
}
-void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name)
+void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
{
ir.set_member_name(id, index, name);
}
-const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const
+const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
{
return ir.get_member_name(id, index);
}
@@ -1139,7 +1099,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const
ir.meta[type_id].members[index].qualified_alias = name;
}
-const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const
+const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
{
auto *m = ir.find_meta(type_id);
if (m && index < m->members.size())
@@ -1148,32 +1108,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind
return ir.get_empty_string();
}
-uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
{
return ir.get_member_decoration(id, index, decoration);
}
-const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
{
return ir.get_member_decoration_bitset(id, index);
}
-bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
{
return ir.has_member_decoration(id, index, decoration);
}
-void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
{
ir.unset_member_decoration(id, index, decoration);
}
-void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument)
+void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
{
ir.set_decoration_string(id, decoration, argument);
}
-void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
{
ir.set_decoration(id, decoration, argument);
}
@@ -1181,28 +1141,8 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
{
auto &dec = ir.meta[id].decoration;
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- dec.extended.packed = true;
- break;
-
- case SPIRVCrossDecorationPackedType:
- dec.extended.packed_type = value;
- break;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- dec.extended.ib_member_index = value;
- break;
-
- case SPIRVCrossDecorationInterfaceOrigID:
- dec.extended.ib_orig_id = value;
- break;
-
- case SPIRVCrossDecorationArgumentBufferID:
- dec.extended.argument_buffer_id = value;
- break;
- }
+ dec.extended.flags.set(decoration);
+ dec.extended.values[decoration] = value;
}
void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
@@ -1210,28 +1150,23 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext
{
ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
auto &dec = ir.meta[type].members[index];
+ dec.extended.flags.set(decoration);
+ dec.extended.values[decoration] = value;
+}
+static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
+{
switch (decoration)
{
- case SPIRVCrossDecorationPacked:
- dec.extended.packed = true;
- break;
-
- case SPIRVCrossDecorationPackedType:
- dec.extended.packed_type = value;
- break;
-
+ case SPIRVCrossDecorationResourceIndexPrimary:
+ case SPIRVCrossDecorationResourceIndexSecondary:
+ case SPIRVCrossDecorationResourceIndexTertiary:
+ case SPIRVCrossDecorationResourceIndexQuaternary:
case SPIRVCrossDecorationInterfaceMemberIndex:
- dec.extended.ib_member_index = value;
- break;
+ return ~(0u);
- case SPIRVCrossDecorationInterfaceOrigID:
- dec.extended.ib_orig_id = value;
- break;
-
- case SPIRVCrossDecorationArgumentBufferID:
- dec.extended.argument_buffer_id = value;
- break;
+ default:
+ return 0;
}
}
@@ -1242,25 +1177,11 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco
return 0;
auto &dec = m->decoration;
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- return uint32_t(dec.extended.packed);
- case SPIRVCrossDecorationPackedType:
- return dec.extended.packed_type;
+ if (!dec.extended.flags.get(decoration))
+ return get_default_extended_decoration(decoration);
- case SPIRVCrossDecorationInterfaceMemberIndex:
- return dec.extended.ib_member_index;
-
- case SPIRVCrossDecorationInterfaceOrigID:
- return dec.extended.ib_orig_id;
-
- case SPIRVCrossDecorationArgumentBufferID:
- return dec.extended.argument_buffer_id;
- }
-
- return 0;
+ return dec.extended.values[decoration];
}
uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1273,25 +1194,9 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index,
return 0;
auto &dec = m->members[index];
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- return uint32_t(dec.extended.packed);
-
- case SPIRVCrossDecorationPackedType:
- return dec.extended.packed_type;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- return dec.extended.ib_member_index;
-
- case SPIRVCrossDecorationInterfaceOrigID:
- return dec.extended.ib_orig_id;
-
- case SPIRVCrossDecorationArgumentBufferID:
- return dec.extended.argument_buffer_id;
- }
-
- return 0;
+ if (!dec.extended.flags.get(decoration))
+ return get_default_extended_decoration(decoration);
+ return dec.extended.values[decoration];
}
bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
@@ -1301,25 +1206,7 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati
return false;
auto &dec = m->decoration;
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- return dec.extended.packed;
-
- case SPIRVCrossDecorationPackedType:
- return dec.extended.packed_type != 0;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- return dec.extended.ib_member_index != uint32_t(-1);
-
- case SPIRVCrossDecorationInterfaceOrigID:
- return dec.extended.ib_orig_id != 0;
-
- case SPIRVCrossDecorationArgumentBufferID:
- return dec.extended.argument_buffer_id != 0;
- }
-
- return false;
+ return dec.extended.flags.get(decoration);
}
bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1332,99 +1219,40 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext
return false;
auto &dec = m->members[index];
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- return dec.extended.packed;
-
- case SPIRVCrossDecorationPackedType:
- return dec.extended.packed_type != 0;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- return dec.extended.ib_member_index != uint32_t(-1);
-
- case SPIRVCrossDecorationInterfaceOrigID:
- return dec.extended.ib_orig_id != 0;
-
- case SPIRVCrossDecorationArgumentBufferID:
- return dec.extended.argument_buffer_id != uint32_t(-1);
- }
-
- return false;
+ return dec.extended.flags.get(decoration);
}
void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
{
auto &dec = ir.meta[id].decoration;
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- dec.extended.packed = false;
- break;
-
- case SPIRVCrossDecorationPackedType:
- dec.extended.packed_type = 0;
- break;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- dec.extended.ib_member_index = ~(0u);
- break;
-
- case SPIRVCrossDecorationInterfaceOrigID:
- dec.extended.ib_orig_id = 0;
- break;
-
- case SPIRVCrossDecorationArgumentBufferID:
- dec.extended.argument_buffer_id = 0;
- break;
- }
+ dec.extended.flags.clear(decoration);
+ dec.extended.values[decoration] = 0;
}
void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
{
ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
auto &dec = ir.meta[type].members[index];
-
- switch (decoration)
- {
- case SPIRVCrossDecorationPacked:
- dec.extended.packed = false;
- break;
-
- case SPIRVCrossDecorationPackedType:
- dec.extended.packed_type = 0;
- break;
-
- case SPIRVCrossDecorationInterfaceMemberIndex:
- dec.extended.ib_member_index = ~(0u);
- break;
-
- case SPIRVCrossDecorationInterfaceOrigID:
- dec.extended.ib_orig_id = 0;
- break;
-
- case SPIRVCrossDecorationArgumentBufferID:
- dec.extended.argument_buffer_id = 0;
- break;
- }
+ dec.extended.flags.clear(decoration);
+ dec.extended.values[decoration] = 0;
}
-StorageClass Compiler::get_storage_class(uint32_t id) const
+StorageClass Compiler::get_storage_class(VariableID id) const
{
return get<SPIRVariable>(id).storage;
}
-const std::string &Compiler::get_name(uint32_t id) const
+const std::string &Compiler::get_name(ID id) const
{
return ir.get_name(id);
}
-const std::string Compiler::get_fallback_name(uint32_t id) const
+const std::string Compiler::get_fallback_name(ID id) const
{
return join("_", id);
}
-const std::string Compiler::get_block_fallback_name(uint32_t id) const
+const std::string Compiler::get_block_fallback_name(VariableID id) const
{
auto &var = get<SPIRVariable>(id);
if (get_name(id).empty())
@@ -1433,37 +1261,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const
return get_name(id);
}
-const Bitset &Compiler::get_decoration_bitset(uint32_t id) const
+const Bitset &Compiler::get_decoration_bitset(ID id) const
{
return ir.get_decoration_bitset(id);
}
-bool Compiler::has_decoration(uint32_t id, Decoration decoration) const
+bool Compiler::has_decoration(ID id, Decoration decoration) const
{
return ir.has_decoration(id, decoration);
}
-const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
{
return ir.get_decoration_string(id, decoration);
}
-const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
{
return ir.get_member_decoration_string(id, index, decoration);
}
-uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
{
return ir.get_decoration(id, decoration);
}
-void Compiler::unset_decoration(uint32_t id, Decoration decoration)
+void Compiler::unset_decoration(ID id, Decoration decoration)
{
ir.unset_decoration(id, decoration);
}
-bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const
+bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
{
auto *m = ir.find_meta(id);
if (!m)
@@ -1587,38 +1415,6 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
return false;
}
-bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to)
-{
- auto *start = &from;
-
- if (start->self == to.self)
- return true;
-
- // Break cycles.
- if (is_continue(start->self))
- return false;
-
- // If our select block doesn't merge, we must break or continue in these blocks,
- // so if continues occur branchless within these blocks, consider them branchless as well.
- // This is typically used for loop control.
- if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone &&
- (block_is_outside_flow_control_from_block(get<SPIRBlock>(start->true_block), to) ||
- block_is_outside_flow_control_from_block(get<SPIRBlock>(start->false_block), to)))
- {
- return true;
- }
- else if (start->merge_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->merge_block), to))
- {
- return true;
- }
- else if (start->next_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->next_block), to))
- {
- return true;
- }
- else
- return false;
-}
-
bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
{
if (!execution_is_branchless(from, to))
@@ -1658,6 +1454,11 @@ bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &t
}
}
+bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
+{
+ return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
+}
+
SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
{
// The block was deemed too complex during code emit, pick conservative fallback paths.
@@ -1669,6 +1470,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
if (block.merge == SPIRBlock::MergeLoop)
return SPIRBlock::WhileLoop;
+ if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
+ {
+ // Continue block is never reached from CFG.
+ return SPIRBlock::ComplexLoop;
+ }
+
auto &dominator = get<SPIRBlock>(block.loop_dominator);
if (execution_is_noop(block, dominator))
@@ -1681,6 +1488,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
+ // If we need to flush Phi in this block, we cannot have a DoWhile loop.
+ bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
+ bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
+ if (flush_phi_to_false || flush_phi_to_true)
+ return SPIRBlock::ComplexLoop;
+
bool positive_do_while = block.true_block == dominator.self &&
(block.false_block == dominator.merge_block ||
(false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
@@ -1702,6 +1515,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
{
handler.set_current_block(block);
+ handler.rearm_current_block(block);
// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
@@ -1725,6 +1539,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
return false;
if (!handler.end_function_scope(ops, i.length))
return false;
+
+ handler.rearm_current_block(block);
}
}
}
@@ -1921,7 +1737,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint
return true;
}
-SmallVector<BufferRange> Compiler::get_active_buffer_ranges(uint32_t id) const
+SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
{
SmallVector<BufferRange> ranges;
BufferAccessHandler handler(*this, ranges, id);
@@ -2014,19 +1830,19 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo
{
auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
- if (c.m.c[0].id[0] != 0)
+ if (c.m.c[0].id[0] != ID(0))
{
x.id = c.m.c[0].id[0];
x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
}
- if (c.m.c[0].id[1] != 0)
+ if (c.m.c[0].id[1] != ID(0))
{
y.id = c.m.c[0].id[1];
y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
}
- if (c.m.c[0].id[2] != 0)
+ if (c.m.c[0].id[2] != ID(0))
{
z.id = c.m.c[0].id[2];
z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
@@ -2081,36 +1897,36 @@ bool Compiler::is_tessellation_shader() const
return is_tessellation_shader(get_execution_model());
}
-void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable)
+void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
{
get<SPIRVariable>(id).remapped_variable = remap_enable;
}
-bool Compiler::get_remapped_variable_state(uint32_t id) const
+bool Compiler::get_remapped_variable_state(VariableID id) const
{
return get<SPIRVariable>(id).remapped_variable;
}
-void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components)
+void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
{
get<SPIRVariable>(id).remapped_components = components;
}
-uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const
+uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
{
return get<SPIRVariable>(id).remapped_components;
}
void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
{
- auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+ auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
if (itr == end(e.implied_read_expressions))
e.implied_read_expressions.push_back(source);
}
void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
{
- auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+ auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
if (itr == end(e.implied_read_expressions))
e.implied_read_expressions.push_back(source);
}
@@ -2251,7 +2067,7 @@ bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
return true;
auto &execution = get_entry_point();
- return find(begin(execution.interface_variables), end(execution.interface_variables), id) !=
+ return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
end(execution.interface_variables);
}
@@ -2331,8 +2147,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
{
for (auto &param : params)
{
- uint32_t image_id = param.global_image ? param.image_id : args[param.image_id];
- uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id];
+ VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
+ VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
auto *i = compiler.maybe_get_backing_variable(image_id);
auto *s = compiler.maybe_get_backing_variable(sampler_id);
@@ -2341,15 +2157,17 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
if (s)
sampler_id = s->self;
- register_combined_image_sampler(caller, image_id, sampler_id, param.depth);
+ register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
}
}
return true;
}
-void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id,
- uint32_t sampler_id, bool depth)
+void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
+ VariableID combined_module_id,
+ VariableID image_id, VariableID sampler_id,
+ bool depth)
{
// We now have a texture ID and a sampler ID which will either be found as a global
// or a parameter in our own function. If both are global, they will not need a parameter,
@@ -2409,12 +2227,15 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
// Build new variable.
compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
- // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
- auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
- auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags;
- new_flags.reset();
- if (old_flags.get(DecorationRelaxedPrecision))
- new_flags.set(DecorationRelaxedPrecision);
+ // Inherit RelaxedPrecision.
+ // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+ bool relaxed_precision =
+ compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
+ compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
+ (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+ if (relaxed_precision)
+ compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
param.id = combined_id;
@@ -2621,8 +2442,10 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
if (sampler)
sampler_id = sampler->self;
+ uint32_t combined_id = args[1];
+
auto &combined_type = compiler.get<SPIRType>(args[0]);
- register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth);
+ register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
}
}
@@ -2630,8 +2453,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
// This information is statically known from the current place in the call stack.
// Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
// which backing variable the image/sample came from.
- uint32_t image_id = remap_parameter(args[2]);
- uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
+ VariableID image_id = remap_parameter(args[2]);
+ VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
[image_id, sampler_id](const CombinedImageSampler &combined) {
@@ -2641,6 +2464,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
if (itr == end(compiler.combined_image_samplers))
{
uint32_t sampled_type;
+ uint32_t combined_module_id;
if (is_fetch)
{
// Have to invent the sampled image type.
@@ -2650,10 +2474,12 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
type.self = sampled_type;
type.basetype = SPIRType::SampledImage;
type.image.depth = false;
+ combined_module_id = 0;
}
else
{
sampled_type = args[0];
+ combined_module_id = args[1];
}
auto id = compiler.ir.increase_bound_by(2);
@@ -2673,12 +2499,14 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
- auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
- // Fetch inherits precision from the image, not sampler (there is no sampler).
- auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
- new_flags.reset();
- if (old_flags.get(DecorationRelaxedPrecision))
- new_flags.set(DecorationRelaxedPrecision);
+ // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+ bool relaxed_precision =
+ (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
+ (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
+ (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+ if (relaxed_precision)
+ compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
// Propagate the array type for the original image as well.
auto *var = compiler.maybe_get_backing_variable(image_id);
@@ -2695,7 +2523,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
return true;
}
-uint32_t Compiler::build_dummy_sampler_for_combined_images()
+VariableID Compiler::build_dummy_sampler_for_combined_images()
{
DummySamplerForCombinedImageHandler handler(*this);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
@@ -2749,12 +2577,12 @@ SmallVector<SpecializationConstant> Compiler::get_specialization_constants() con
return spec_consts;
}
-SPIRConstant &Compiler::get_constant(uint32_t id)
+SPIRConstant &Compiler::get_constant(ConstantID id)
{
return get<SPIRConstant>(id);
}
-const SPIRConstant &Compiler::get_constant(uint32_t id) const
+const SPIRConstant &Compiler::get_constant(ConstantID id) const
{
return get<SPIRConstant>(id);
}
@@ -2938,7 +2766,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
if (length < 2)
return false;
- uint32_t ptr = args[0];
+ ID ptr = args[0];
auto *var = compiler.maybe_get_backing_variable(ptr);
// If we store through an access chain, we have a partial write.
@@ -2983,7 +2811,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
// The result of an access chain is a fixed expression and is not really considered a temporary.
auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
- e.loaded_from = backing_variable ? backing_variable->self : 0;
+ e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
// Other backends might use SPIRAccessChain for this later.
compiler.ir.ids[args[1]].set_allow_type_rewrite();
@@ -2996,8 +2824,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
if (length < 2)
return false;
- uint32_t lhs = args[0];
- uint32_t rhs = args[1];
+ ID lhs = args[0];
+ ID rhs = args[1];
auto *var = compiler.maybe_get_backing_variable(lhs);
// If we store through an access chain, we have a partial write.
@@ -3096,6 +2924,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
}
case OpArrayLength:
+ case OpLine:
+ case OpNoLine:
// Uses literals, but cannot be a phi variable or temporary, so ignore.
break;
@@ -3334,11 +3164,34 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
unordered_map<uint32_t, uint32_t> potential_loop_variables;
+ // Find the loop dominator block for each block.
+ for (auto &block_id : entry.blocks)
+ {
+ auto &block = get<SPIRBlock>(block_id);
+
+ auto itr = ir.continue_block_to_loop_header.find(block_id);
+ if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id)
+ {
+ // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
+ // Edge case is when continue block is also the loop header, don't set the dominator in this case.
+ block.loop_dominator = itr->second;
+ }
+ else
+ {
+ uint32_t loop_dominator = cfg.find_loop_dominator(block_id);
+ if (loop_dominator != block_id)
+ block.loop_dominator = loop_dominator;
+ else
+ block.loop_dominator = SPIRBlock::NoDominator;
+ }
+ }
+
// For each variable which is statically accessed.
for (auto &var : handler.accessed_variables_to_block)
{
// Only deal with variables which are considered local variables in this function.
- if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables))
+ if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
+ end(entry.local_variables))
continue;
DominatorBuilder builder(cfg);
@@ -3379,7 +3232,35 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
builder.lift_continue_block_dominator();
// Add it to a per-block list of variables.
- uint32_t dominating_block = builder.get_dominator();
+ BlockID dominating_block = builder.get_dominator();
+
+ // For variables whose dominating block is inside a loop, there is a risk that these variables
+ // actually need to be preserved across loop iterations. We can express this by adding
+ // a "read" access to the loop header.
+ // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
+ // Should that fail, we look for the outermost loop header and tack on an access there.
+ // Phi nodes cannot have this problem.
+ if (dominating_block)
+ {
+ auto &variable = get<SPIRVariable>(var.first);
+ if (!variable.phi_variable)
+ {
+ auto *block = &get<SPIRBlock>(dominating_block);
+ bool preserve = may_read_undefined_variable_in_block(*block, var.first);
+ if (preserve)
+ {
+ // Find the outermost loop scope.
+ while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
+ block = &get<SPIRBlock>(block->loop_dominator);
+
+ if (block->self != dominating_block)
+ {
+ builder.add_block(block->self);
+ dominating_block = builder.get_dominator();
+ }
+ }
+ }
+ }
// If all blocks here are dead code, this will be 0, so the variable in question
// will be completely eliminated.
@@ -3416,10 +3297,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
{
builder.add_block(block);
- // If a temporary is used in more than one block, we might have to lift continue block
- // access up to loop header like we did for variables.
if (blocks.size() != 1 && is_continue(block))
{
+ // The risk here is that inner loop can dominate the continue block.
+ // Any temporary we access in the continue block must be declared before the loop.
+ // This is moot for complex loops however.
auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
assert(loop_header_block.merge == SPIRBlock::MergeLoop);
@@ -3427,14 +3309,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
if (!loop_header_block.complex_continue)
builder.add_block(loop_header_block.self);
}
- else if (blocks.size() != 1 && is_single_block_loop(block))
- {
- // Awkward case, because the loop header is also the continue block.
- force_temporary = true;
- }
}
uint32_t dominating_block = builder.get_dominator();
+
+ if (blocks.size() != 1 && is_single_block_loop(dominating_block))
+ {
+ // Awkward case, because the loop header is also the continue block,
+ // so hoisting to loop header does not help.
+ force_temporary = true;
+ }
+
if (dominating_block)
{
// If we touch a variable in the dominating block, this is the expected setup.
@@ -3485,17 +3370,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
{
auto &var = get<SPIRVariable>(loop_variable.first);
auto dominator = var.dominator;
- auto block = loop_variable.second;
+ BlockID block = loop_variable.second;
// The variable was accessed in multiple continue blocks, ignore.
- if (block == ~(0u) || block == 0)
+ if (block == BlockID(~(0u)) || block == BlockID(0))
continue;
// Dead code.
- if (dominator == 0)
+ if (dominator == ID(0))
continue;
- uint32_t header = 0;
+ BlockID header = 0;
// Find the loop header for this block if we are a continue block.
{
@@ -3554,10 +3439,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
// merge can occur. Walk the CFG to see if we find anything.
seen_blocks.clear();
- cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) {
+ cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
// We found a block which accesses the variable outside the loop.
if (blocks.find(walk_block) != end(blocks))
static_loop_init = false;
+ return true;
});
if (!static_loop_init)
@@ -3572,7 +3458,80 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
}
}
-Bitset Compiler::get_buffer_block_flags(uint32_t id) const
+bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
+{
+ for (auto &op : block.ops)
+ {
+ auto *ops = stream(op);
+ switch (op.op)
+ {
+ case OpStore:
+ case OpCopyMemory:
+ if (ops[0] == var)
+ return false;
+ break;
+
+ case OpAccessChain:
+ case OpInBoundsAccessChain:
+ case OpPtrAccessChain:
+ // Access chains are generally used to partially read and write. It's too hard to analyze
+ // if all constituents are written fully before continuing, so just assume it's preserved.
+ // This is the same as the parameter preservation analysis.
+ if (ops[2] == var)
+ return true;
+ break;
+
+ case OpSelect:
+ // Variable pointers.
+ // We might read before writing.
+ if (ops[3] == var || ops[4] == var)
+ return true;
+ break;
+
+ case OpPhi:
+ {
+ // Variable pointers.
+ // We might read before writing.
+ if (op.length < 2)
+ break;
+
+ uint32_t count = op.length - 2;
+ for (uint32_t i = 0; i < count; i += 2)
+ if (ops[i + 2] == var)
+ return true;
+ break;
+ }
+
+ case OpCopyObject:
+ case OpLoad:
+ if (ops[2] == var)
+ return true;
+ break;
+
+ case OpFunctionCall:
+ {
+ if (op.length < 3)
+ break;
+
+ // May read before writing.
+ uint32_t count = op.length - 3;
+ for (uint32_t i = 0; i < count; i++)
+ if (ops[i + 3] == var)
+ return true;
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // Not accessed somehow, at least not in a usual fashion.
+ // It's likely accessed in a branch, so assume we must preserve.
+ return true;
+}
+
+Bitset Compiler::get_buffer_block_flags(VariableID id) const
{
return ir.get_buffer_block_flags(get<SPIRVariable>(id));
}
@@ -3848,6 +3807,20 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin
return true;
}
+const CFG &Compiler::get_cfg_for_current_function() const
+{
+ assert(current_function);
+ return get_cfg_for_function(current_function->self);
+}
+
+const CFG &Compiler::get_cfg_for_function(uint32_t id) const
+{
+ auto cfg_itr = function_cfgs.find(id);
+ assert(cfg_itr != end(function_cfgs));
+ assert(cfg_itr->second);
+ return *cfg_itr->second;
+}
+
void Compiler::build_function_control_flow_graphs_and_analyze()
{
CFGBuilder handler(*this);
@@ -3997,13 +3970,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
return true;
}
-bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const
+bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
{
auto *m = ir.find_meta(id);
return m && m->hlsl_is_magic_counter_buffer;
}
-bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const
+bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
{
auto *m = ir.find_meta(id);
@@ -4068,20 +4041,69 @@ const SmallVector<std::string> &Compiler::get_declared_extensions() const
return ir.declared_extensions;
}
-std::string Compiler::get_remapped_declared_block_name(uint32_t id) const
+std::string Compiler::get_remapped_declared_block_name(VariableID id) const
+{
+ return get_remapped_declared_block_name(id, false);
+}
+
+std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
{
auto itr = declared_block_names.find(id);
if (itr != end(declared_block_names))
+ {
return itr->second;
+ }
else
{
auto &var = get<SPIRVariable>(id);
- auto &type = get<SPIRType>(var.basetype);
- auto *type_meta = ir.find_meta(type.self);
- auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
- return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
+ if (fallback_prefer_instance_name)
+ {
+ return to_name(var.self);
+ }
+ else
+ {
+ auto &type = get<SPIRType>(var.basetype);
+ auto *type_meta = ir.find_meta(type.self);
+ auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
+ return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
+ }
+ }
+}
+
+bool Compiler::reflection_ssbo_instance_name_is_significant() const
+{
+ if (ir.source.known)
+ {
+ // UAVs from HLSL source tend to be declared in a way where the type is reused
+ // but the instance name is significant, and that's the name we should report.
+ // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
+ return ir.source.hlsl;
}
+
+ unordered_set<uint32_t> ssbo_type_ids;
+ bool aliased_ssbo_types = false;
+
+ // If we don't have any OpSource information, we need to perform some shaky heuristics.
+ ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+ auto &type = this->get<SPIRType>(var.basetype);
+ if (!type.pointer || var.storage == StorageClassFunction)
+ return;
+
+ bool ssbo = var.storage == StorageClassStorageBuffer ||
+ (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock));
+
+ if (ssbo)
+ {
+ if (ssbo_type_ids.count(type.self))
+ aliased_ssbo_types = true;
+ else
+ ssbo_type_ids.insert(type.self);
+ }
+ });
+
+ // If the block name is aliased, assume we have HLSL-style UAV declarations.
+ return aliased_ssbo_types;
}
bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
@@ -4110,6 +4132,8 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul
case OpCommitWritePipe:
case OpGroupCommitReadPipe:
case OpGroupCommitWritePipe:
+ case OpLine:
+ case OpNoLine:
return false;
default:
@@ -4242,6 +4266,316 @@ void Compiler::analyze_non_block_pointer_types()
sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
}
+bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
+{
+ if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
+ {
+ if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
+ {
+ // Most complex case, we have no sensible way of dealing with this
+ // other than taking the 100% conservative approach, exit early.
+ split_function_case = true;
+ return false;
+ }
+ else
+ {
+ interlock_function_id = call_stack.back();
+ // If this call is performed inside control flow we have a problem.
+ auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
+
+ uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
+ bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
+ if (!outside_control_flow)
+ control_flow_interlock = true;
+ }
+ }
+ return true;
+}
+
+void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
+{
+ current_block_id = block.self;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+ if (length < 3)
+ return false;
+ call_stack.push_back(args[2]);
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+ call_stack.pop_back();
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+ if (length < 3)
+ return false;
+
+ if (args[2] == interlock_function_id)
+ call_stack_is_interlocked = true;
+
+ call_stack.push_back(args[2]);
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+ if (call_stack.back() == interlock_function_id)
+ call_stack_is_interlocked = false;
+
+ call_stack.pop_back();
+ return true;
+}
+
+void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
+{
+ if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
+ split_function_case)
+ {
+ compiler.interlocked_resources.insert(id);
+ }
+}
+
+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+ // Only care about critical section analysis if we have simple case.
+ if (use_critical_section)
+ {
+ if (opcode == OpBeginInvocationInterlockEXT)
+ {
+ in_crit_sec = true;
+ return true;
+ }
+
+ if (opcode == OpEndInvocationInterlockEXT)
+ {
+ // End critical section--nothing more to do.
+ return false;
+ }
+ }
+
+ // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+ switch (opcode)
+ {
+ case OpLoad:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+
+ // We're only concerned with buffer and image memory here.
+ if (!var)
+ break;
+
+ switch (var->storage)
+ {
+ default:
+ break;
+
+ case StorageClassUniformConstant:
+ {
+ uint32_t result_type = args[0];
+ uint32_t id = args[1];
+ compiler.set<SPIRExpression>(id, "", result_type, true);
+ compiler.register_read(id, ptr, true);
+ break;
+ }
+
+ case StorageClassUniform:
+ // Must have BufferBlock; we only care about SSBOs.
+ if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+ break;
+ // fallthrough
+ case StorageClassStorageBuffer:
+ access_potential_resource(var->self);
+ break;
+ }
+ break;
+ }
+
+ case OpInBoundsAccessChain:
+ case OpAccessChain:
+ case OpPtrAccessChain:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t result_type = args[0];
+
+ auto &type = compiler.get<SPIRType>(result_type);
+ if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+ type.storage == StorageClassStorageBuffer)
+ {
+ uint32_t id = args[1];
+ uint32_t ptr = args[2];
+ compiler.set<SPIRExpression>(id, "", result_type, true);
+ compiler.register_read(id, ptr, true);
+ compiler.ir.ids[id].set_allow_type_rewrite();
+ }
+ break;
+ }
+
+ case OpImageTexelPointer:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t result_type = args[0];
+ uint32_t id = args[1];
+ uint32_t ptr = args[2];
+ auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var)
+ e.loaded_from = var->self;
+ break;
+ }
+
+ case OpStore:
+ case OpImageWrite:
+ case OpAtomicStore:
+ {
+ if (length < 1)
+ return false;
+
+ uint32_t ptr = args[0];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+ var->storage == StorageClassStorageBuffer))
+ {
+ access_potential_resource(var->self);
+ }
+
+ break;
+ }
+
+ case OpCopyMemory:
+ {
+ if (length < 2)
+ return false;
+
+ uint32_t dst = args[0];
+ uint32_t src = args[1];
+ auto *dst_var = compiler.maybe_get_backing_variable(dst);
+ auto *src_var = compiler.maybe_get_backing_variable(src);
+
+ if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+ access_potential_resource(dst_var->self);
+
+ if (src_var)
+ {
+ if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+ break;
+
+ if (src_var->storage == StorageClassUniform &&
+ !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+ {
+ break;
+ }
+
+ access_potential_resource(src_var->self);
+ }
+
+ break;
+ }
+
+ case OpImageRead:
+ case OpAtomicLoad:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+
+ // We're only concerned with buffer and image memory here.
+ if (!var)
+ break;
+
+ switch (var->storage)
+ {
+ default:
+ break;
+
+ case StorageClassUniform:
+ // Must have BufferBlock; we only care about SSBOs.
+ if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+ break;
+ // fallthrough
+ case StorageClassUniformConstant:
+ case StorageClassStorageBuffer:
+ access_potential_resource(var->self);
+ break;
+ }
+ break;
+ }
+
+ case OpAtomicExchange:
+ case OpAtomicCompareExchange:
+ case OpAtomicIIncrement:
+ case OpAtomicIDecrement:
+ case OpAtomicIAdd:
+ case OpAtomicISub:
+ case OpAtomicSMin:
+ case OpAtomicUMin:
+ case OpAtomicSMax:
+ case OpAtomicUMax:
+ case OpAtomicAnd:
+ case OpAtomicOr:
+ case OpAtomicXor:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+ var->storage == StorageClassStorageBuffer))
+ {
+ access_potential_resource(var->self);
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+ if (get_execution_model() == ExecutionModelFragment &&
+ (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+ get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+ get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+ get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+ {
+ InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
+ traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
+
+ InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
+ handler.interlock_function_id = prepass_handler.interlock_function_id;
+ handler.split_function_case = prepass_handler.split_function_case;
+ handler.control_flow_interlock = prepass_handler.control_flow_interlock;
+ handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
+
+ traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+ // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
+ interlocked_is_complex =
+ !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
+ }
+}
+
bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
{
if (!type.pointer)
@@ -4250,3 +4584,12 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
// If parent type has same pointer depth, we must have an array of pointers.
return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
}
+
+bool Compiler::flush_phi_required(BlockID from, BlockID to) const
+{
+ auto &child = get<SPIRBlock>(to);
+ for (auto &phi : child.phi_variables)
+ if (phi.parent == from)
+ return true;
+ return false;
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
index 4129e81..7385a6c 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp
@@ -27,18 +27,18 @@ struct Resource
{
// Resources are identified with their SPIR-V ID.
// This is the ID of the OpVariable.
- uint32_t id;
+ ID id;
// The type ID of the variable which includes arrays and all type modifications.
// This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general
// since these modifications typically happen on the base_type_id.
- uint32_t type_id;
+ TypeID type_id;
// The base type of the declared resource.
// This type is the base type which ignores pointers and arrays of the type_id.
// This is mostly useful to parse decorations of the underlying type.
// base_type_id can also be obtained with get_type(get_type(type_id).self).
- uint32_t base_type_id;
+ TypeID base_type_id;
// The declared name (OpName) of the resource.
// For Buffer blocks, the name actually reflects the externally
@@ -77,17 +77,17 @@ struct ShaderResources
struct CombinedImageSampler
{
// The ID of the sampler2D variable.
- uint32_t combined_id;
+ VariableID combined_id;
// The ID of the texture2D variable.
- uint32_t image_id;
+ VariableID image_id;
// The ID of the sampler variable.
- uint32_t sampler_id;
+ VariableID sampler_id;
};
struct SpecializationConstant
{
// The ID of the specialization constant.
- uint32_t id;
+ ConstantID id;
// The constant ID of the constant, used in Vulkan during pipeline creation.
uint32_t constant_id;
};
@@ -117,15 +117,6 @@ struct EntryPoint
spv::ExecutionModel execution_model;
};
-enum ExtendedDecorations
-{
- SPIRVCrossDecorationPacked,
- SPIRVCrossDecorationPackedType,
- SPIRVCrossDecorationInterfaceMemberIndex,
- SPIRVCrossDecorationInterfaceOrigID,
- SPIRVCrossDecorationArgumentBufferID
-};
-
class Compiler
{
public:
@@ -151,81 +142,81 @@ public:
virtual std::string compile();
// Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned.
- const std::string &get_name(uint32_t id) const;
+ const std::string &get_name(ID id) const;
// Applies a decoration to an ID. Effectively injects OpDecorate.
- void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
- void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
+ void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+ void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
// Overrides the identifier OpName of an ID.
// Identifiers beginning with underscores or identifiers which contain double underscores
// are reserved by the implementation.
- void set_name(uint32_t id, const std::string &name);
+ void set_name(ID id, const std::string &name);
// Gets a bitmask for the decorations which are applied to ID.
// I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar)
- const Bitset &get_decoration_bitset(uint32_t id) const;
+ const Bitset &get_decoration_bitset(ID id) const;
// Returns whether the decoration has been applied to the ID.
- bool has_decoration(uint32_t id, spv::Decoration decoration) const;
+ bool has_decoration(ID id, spv::Decoration decoration) const;
// Gets the value for decorations which take arguments.
// If the decoration is a boolean (i.e. spv::DecorationNonWritable),
// 1 will be returned.
// If decoration doesn't exist or decoration is not recognized,
// 0 will be returned.
- uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
- const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
+ uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+ const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
// Removes the decoration for an ID.
- void unset_decoration(uint32_t id, spv::Decoration decoration);
+ void unset_decoration(ID id, spv::Decoration decoration);
// Gets the SPIR-V type associated with ID.
// Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource.
- const SPIRType &get_type(uint32_t id) const;
+ const SPIRType &get_type(TypeID id) const;
// Gets the SPIR-V type of a variable.
- const SPIRType &get_type_from_variable(uint32_t id) const;
+ const SPIRType &get_type_from_variable(VariableID id) const;
// Gets the underlying storage class for an OpVariable.
- spv::StorageClass get_storage_class(uint32_t id) const;
+ spv::StorageClass get_storage_class(VariableID id) const;
// If get_name() is an empty string, get the fallback name which will be used
// instead in the disassembled source.
- virtual const std::string get_fallback_name(uint32_t id) const;
+ virtual const std::string get_fallback_name(ID id) const;
// If get_name() of a Block struct is an empty string, get the fallback name.
// This needs to be per-variable as multiple variables can use the same block type.
- virtual const std::string get_block_fallback_name(uint32_t id) const;
+ virtual const std::string get_block_fallback_name(VariableID id) const;
// Given an OpTypeStruct in ID, obtain the identifier for member number "index".
// This may be an empty string.
- const std::string &get_member_name(uint32_t id, uint32_t index) const;
+ const std::string &get_member_name(TypeID id, uint32_t index) const;
// Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index".
- uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
- const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+ uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+ const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
// Sets the member identifier for OpTypeStruct ID, member number "index".
- void set_member_name(uint32_t id, uint32_t index, const std::string &name);
+ void set_member_name(TypeID id, uint32_t index, const std::string &name);
// Returns the qualified member identifier for OpTypeStruct ID, member number "index",
// or an empty string if no qualified alias exists
- const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const;
+ const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const;
// Gets the decoration mask for a member of a struct, similar to get_decoration_mask.
- const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
+ const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
// Returns whether the decoration has been applied to a member of a struct.
- bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+ bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
// Similar to set_decoration, but for struct members.
- void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
- void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+ void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+ void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
const std::string &argument);
// Unsets a member decoration, similar to unset_decoration.
- void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+ void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
// Gets the fallback name for a member, similar to get_fallback_name.
virtual const std::string get_fallback_member_name(uint32_t index) const
@@ -237,7 +228,7 @@ public:
// SPIR-V shader. The granularity of this analysis is per-member of a struct.
// This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks.
// ID is the Resource::id obtained from get_shader_resources().
- SmallVector<BufferRange> get_active_buffer_ranges(uint32_t id) const;
+ SmallVector<BufferRange> get_active_buffer_ranges(VariableID id) const;
// Returns the effective size of a buffer block.
size_t get_declared_struct_size(const SPIRType &struct_type) const;
@@ -255,7 +246,7 @@ public:
size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const;
// Returns the effective size of a buffer block struct member.
- virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
+ size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
// Returns a set of all global variables which are statically accessed
// by the control flow graph from the current entry point.
@@ -265,12 +256,12 @@ public:
//
// To use the returned set as the filter for which variables are used during compilation,
// this set can be moved to set_enabled_interface_variables().
- std::unordered_set<uint32_t> get_active_interface_variables() const;
+ std::unordered_set<VariableID> get_active_interface_variables() const;
// Sets the interface variables which are used during compilation.
// By default, all variables are used.
// Once set, compile() will only consider the set in active_variables.
- void set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables);
+ void set_enabled_interface_variables(std::unordered_set<VariableID> active_variables);
// Query shader resources, use ids with reflection interface to modify or query binding points, etc.
ShaderResources get_shader_resources() const;
@@ -278,19 +269,19 @@ public:
// Query shader resources, but only return the variables which are part of active_variables.
// E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically
// accessed.
- ShaderResources get_shader_resources(const std::unordered_set<uint32_t> &active_variables) const;
+ ShaderResources get_shader_resources(const std::unordered_set<VariableID> &active_variables) const;
// Remapped variables are considered built-in variables and a backend will
// not emit a declaration for this variable.
// This is mostly useful for making use of builtins which are dependent on extensions.
- void set_remapped_variable_state(uint32_t id, bool remap_enable);
- bool get_remapped_variable_state(uint32_t id) const;
+ void set_remapped_variable_state(VariableID id, bool remap_enable);
+ bool get_remapped_variable_state(VariableID id) const;
// For subpassInput variables which are remapped to plain variables,
// the number of components in the remapped
// variable must be specified as the backing type of subpass inputs are opaque.
- void set_subpass_input_remapped_components(uint32_t id, uint32_t components);
- uint32_t get_subpass_input_remapped_components(uint32_t id) const;
+ void set_subpass_input_remapped_components(VariableID id, uint32_t components);
+ uint32_t get_subpass_input_remapped_components(VariableID id) const;
// All operations work on the current entry point.
// Entry points can be swapped out with set_entry_point().
@@ -371,7 +362,7 @@ public:
// If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile().
// Calling this function also invalidates get_active_interface_variables(), so this should be called
// before that function.
- uint32_t build_dummy_sampler_for_combined_images();
+ VariableID build_dummy_sampler_for_combined_images();
// Analyzes all separate image and samplers used from the currently selected entry point,
// and re-routes them all to a combined image sampler instead.
@@ -420,8 +411,8 @@ public:
// constant_type is the SPIRType for the specialization constant,
// which can be queried to determine which fields in the unions should be poked at.
SmallVector<SpecializationConstant> get_specialization_constants() const;
- SPIRConstant &get_constant(uint32_t id);
- const SPIRConstant &get_constant(uint32_t id) const;
+ SPIRConstant &get_constant(ConstantID id);
+ const SPIRConstant &get_constant(ConstantID id) const;
uint32_t get_current_id_bound() const
{
@@ -444,7 +435,7 @@ public:
// If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true,
// otherwise, returns false.
// If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false.
- bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const;
+ bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const;
// HLSL counter buffer reflection interface.
// Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where
@@ -459,7 +450,7 @@ public:
// only return true if OpSource was reported HLSL.
// To rely on this functionality, ensure that the SPIR-V module is not stripped.
- bool buffer_is_hlsl_counter_buffer(uint32_t id) const;
+ bool buffer_is_hlsl_counter_buffer(VariableID id) const;
// Queries if a buffer object has a neighbor "counter" buffer.
// If so, the ID of that counter buffer will be returned in counter_id.
@@ -467,7 +458,7 @@ public:
// Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will
// only return true if OpSource was reported HLSL.
// To rely on this functionality, ensure that the SPIR-V module is not stripped.
- bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const;
+ bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const;
// Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module.
const SmallVector<spv::Capability> &get_declared_capabilities() const;
@@ -488,13 +479,13 @@ public:
// ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type.
//
// This also applies to HLSL cbuffers.
- std::string get_remapped_declared_block_name(uint32_t id) const;
+ std::string get_remapped_declared_block_name(VariableID id) const;
// For buffer block variables, get the decorations for that variable.
// Sometimes, decorations for buffer blocks are found in member decorations instead
// of direct decorations on the variable itself.
// The most common use here is to check if a buffer is readonly or writeonly.
- Bitset get_buffer_block_flags(uint32_t id) const;
+ Bitset get_buffer_block_flags(VariableID id) const;
protected:
const uint32_t *stream(const Instruction &instr) const
@@ -518,7 +509,7 @@ protected:
SPIRFunction *current_function = nullptr;
SPIRBlock *current_block = nullptr;
- std::unordered_set<uint32_t> active_interface_variables;
+ std::unordered_set<VariableID> active_interface_variables;
bool check_active_interface_variables = false;
// If our IDs are out of range here as part of opcodes, throw instead of
@@ -558,7 +549,9 @@ protected:
template <typename T>
const T *maybe_get(uint32_t id) const
{
- if (ir.ids[id].get_type() == static_cast<Types>(T::type))
+ if (id >= ir.ids.size())
+ return nullptr;
+ else if (ir.ids[id].get_type() == static_cast<Types>(T::type))
return &get<T>(id);
else
return nullptr;
@@ -614,6 +607,7 @@ protected:
bool expression_is_lvalue(uint32_t id) const;
bool variable_storage_is_aliased(const SPIRVariable &var);
SPIRVariable *maybe_get_backing_variable(uint32_t chain);
+ spv::StorageClass get_backing_variable_storage(uint32_t ptr);
void register_read(uint32_t expr, uint32_t chain, bool forwarded);
void register_write(uint32_t chain);
@@ -626,7 +620,7 @@ protected:
inline bool is_single_block_loop(uint32_t next) const
{
auto &block = get<SPIRBlock>(next);
- return block.merge == SPIRBlock::MergeLoop && block.continue_block == next;
+ return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next);
}
inline bool is_break(uint32_t next) const
@@ -666,9 +660,9 @@ protected:
bool function_is_pure(const SPIRFunction &func);
bool block_is_pure(const SPIRBlock &block);
- bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to);
bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const;
+ bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const;
bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const;
SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const;
@@ -718,6 +712,13 @@ protected:
{
}
+ // Called after returning from a function or when entering a block,
+ // can be called multiple times per block,
+ // while set_current_block is only called on block entry.
+ virtual void rearm_current_block(const SPIRBlock &)
+ {
+ }
+
virtual bool begin_function_scope(const uint32_t *, uint32_t)
{
return true;
@@ -749,7 +750,7 @@ protected:
struct InterfaceVariableAccessHandler : OpcodeHandler
{
- InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<uint32_t> &variables_)
+ InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<VariableID> &variables_)
: compiler(compiler_)
, variables(variables_)
{
@@ -758,7 +759,7 @@ protected:
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
const Compiler &compiler;
- std::unordered_set<uint32_t> &variables;
+ std::unordered_set<VariableID> &variables;
};
struct CombinedImageSamplerHandler : OpcodeHandler
@@ -780,8 +781,8 @@ protected:
uint32_t remap_parameter(uint32_t id);
void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length);
void pop_remap_parameters();
- void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id,
- bool depth);
+ void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id,
+ VariableID sampler_id, bool depth);
};
struct DummySamplerForCombinedImageHandler : OpcodeHandler
@@ -814,7 +815,7 @@ protected:
// This must be an ordered data structure so we always pick the same type aliases.
SmallVector<uint32_t> global_struct_cache;
- ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const;
+ ShaderResources get_shader_resources(const std::unordered_set<VariableID> *active_variables) const;
VariableTypeRemapCallback variable_remap_callback;
@@ -822,7 +823,9 @@ protected:
std::unordered_set<uint32_t> forced_temporaries;
std::unordered_set<uint32_t> forwarded_temporaries;
+ std::unordered_set<uint32_t> suppressed_usage_tracking;
std::unordered_set<uint32_t> hoisted_temporaries;
+ std::unordered_set<uint32_t> forced_invariant_temporaries;
Bitset active_input_builtins;
Bitset active_output_builtins;
@@ -889,9 +892,12 @@ protected:
void build_function_control_flow_graphs_and_analyze();
std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
+ const CFG &get_cfg_for_current_function() const;
+ const CFG &get_cfg_for_function(uint32_t id) const;
+
struct CFGBuilder : OpcodeHandler
{
- CFGBuilder(Compiler &compiler_);
+ explicit CFGBuilder(Compiler &compiler_);
bool follow_function_call(const SPIRFunction &func) override;
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@@ -936,7 +942,7 @@ protected:
struct PhysicalStorageBufferPointerHandler : OpcodeHandler
{
- PhysicalStorageBufferPointerHandler(Compiler &compiler_);
+ explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
Compiler &compiler;
std::unordered_set<uint32_t> types;
@@ -947,6 +953,62 @@ protected:
void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler);
void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler,
bool single_function);
+ bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
+
+ // Finds all resources that are written to from inside the critical section, if present.
+ // The critical section is delimited by OpBeginInvocationInterlockEXT and
+ // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+ // while inside the critical section must be placed in a raster order group.
+ struct InterlockedResourceAccessHandler : OpcodeHandler
+ {
+ InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
+ : compiler(compiler_)
+ {
+ call_stack.push_back(entry_point_id);
+ }
+
+ bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+ bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+ bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+ Compiler &compiler;
+ bool in_crit_sec = false;
+
+ uint32_t interlock_function_id = 0;
+ bool split_function_case = false;
+ bool control_flow_interlock = false;
+ bool use_critical_section = false;
+ bool call_stack_is_interlocked = false;
+ SmallVector<uint32_t> call_stack;
+
+ void access_potential_resource(uint32_t id);
+ };
+
+ struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
+ {
+ InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
+ : compiler(compiler_)
+ {
+ call_stack.push_back(entry_point_id);
+ }
+
+ void rearm_current_block(const SPIRBlock &block) override;
+ bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+ bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+ bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+ Compiler &compiler;
+ uint32_t interlock_function_id = 0;
+ uint32_t current_block_id = 0;
+ bool split_function_case = false;
+ bool control_flow_interlock = false;
+ SmallVector<uint32_t> call_stack;
+ };
+
+ void analyze_interlocked_resource_usage();
+ // The set of all resources written while inside the critical section, if present.
+ std::unordered_set<uint32_t> interlocked_resources;
+ bool interlocked_is_complex = false;
void make_constant_null(uint32_t id, uint32_t type);
@@ -972,15 +1034,18 @@ protected:
void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration);
bool type_is_array_of_pointers(const SPIRType &type) const;
+ bool type_is_block_like(const SPIRType &type) const;
+ bool type_is_opaque_value(const SPIRType &type) const;
+
+ bool reflection_ssbo_instance_name_is_significant() const;
+ std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const;
+
+ bool flush_phi_required(BlockID from, BlockID to) const;
private:
// Used only to implement the old deprecated get_entry_point() interface.
const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
SPIREntryPoint &get_first_entry_point(const std::string &name);
-
- void fixup_type_alias();
- bool type_is_block_like(const SPIRType &type) const;
- bool type_is_opaque_value(const SPIRType &type) const;
};
} // namespace SPIRV_CROSS_NAMESPACE
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
index d3352d9..f6e63b4 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp
@@ -33,6 +33,11 @@
#if SPIRV_CROSS_C_API_REFLECT
#include "spirv_reflect.hpp"
#endif
+
+#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
+#include "gitversion.h"
+#endif
+
#include "spirv_parser.hpp"
#include <memory>
#include <new>
@@ -157,7 +162,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation
struct spvc_set_s : ScratchMemoryAllocation
{
- std::unordered_set<uint32_t> set;
+ std::unordered_set<VariableID> set;
};
// Dummy-inherit to we can keep our opaque type handle type safe in C-land as well,
@@ -412,6 +417,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_FLIP_VERTEX_Y:
options->glsl.vertex.flip_vert_y = value != 0;
break;
+ case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES:
+ options->glsl.emit_line_directives = value != 0;
+ break;
case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE:
options->glsl.vertex.support_nonzero_base_instance = value != 0;
@@ -474,8 +482,8 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
options->msl.texel_buffer_texture_width = value;
break;
- case SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX:
- options->msl.aux_buffer_index = value;
+ case SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX:
+ options->msl.swizzle_buffer_index = value;
break;
case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX:
@@ -533,6 +541,34 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE:
options->msl.texture_buffer_native = value != 0;
break;
+
+ case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX:
+ options->msl.buffer_size_buffer_index = value;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_MULTIVIEW:
+ options->msl.multiview = value != 0;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX:
+ options->msl.view_mask_buffer_index = value;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX:
+ options->msl.device_index = value;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX:
+ options->msl.view_index_from_device_index = value != 0;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE:
+ options->msl.dispatch_base = value != 0;
+ break;
+
+ case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX:
+ options->msl.dynamic_offsets_buffer_index = value;
+ break;
#endif
default:
@@ -726,7 +762,7 @@ spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler)
#endif
}
-spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
+spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler)
{
#if SPIRV_CROSS_C_API_MSL
if (compiler->backend != SPVC_BACKEND_MSL)
@@ -736,13 +772,35 @@ spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
}
auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
- return msl.needs_aux_buffer() ? SPVC_TRUE : SPVC_FALSE;
+ return msl.needs_swizzle_buffer() ? SPVC_TRUE : SPVC_FALSE;
#else
compiler->context->report_error("MSL function used on a non-MSL backend.");
return SPVC_FALSE;
#endif
}
+spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_FALSE;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ return msl.needs_buffer_size_buffer() ? SPVC_TRUE : SPVC_FALSE;
+#else
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_FALSE;
+#endif
+}
+
+spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler)
+{
+ return spvc_compiler_msl_needs_swizzle_buffer(compiler);
+}
+
spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler)
{
#if SPIRV_CROSS_C_API_MSL
@@ -811,7 +869,7 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
attr.msl_stride = va->msl_stride;
attr.format = static_cast<MSLVertexFormat>(va->format);
attr.builtin = static_cast<spv::BuiltIn>(va->builtin);
- attr.per_instance = va->per_instance;
+ attr.per_instance = va->per_instance != 0;
msl.add_msl_vertex_attribute(attr);
return SPVC_SUCCESS;
#else
@@ -848,6 +906,27 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
#endif
}
+spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ msl.add_dynamic_buffer(desc_set, binding, index);
+ return SPVC_SUCCESS;
+#else
+ (void)binding;
+ (void)desc_set;
+ (void)index;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set)
{
#if SPIRV_CROSS_C_API_MSL
@@ -867,6 +946,26 @@ spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler
#endif
}
+spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ msl.set_argument_buffer_device_address_space(desc_set, bool(device_address));
+ return SPVC_SUCCESS;
+#else
+ (void)desc_set;
+ (void)device_address;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location)
{
#if SPIRV_CROSS_C_API_MSL
@@ -907,38 +1006,140 @@ spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutio
#endif
}
-spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id,
- const spvc_msl_constexpr_sampler *sampler)
-{
#if SPIRV_CROSS_C_API_MSL
- if (compiler->backend != SPVC_BACKEND_MSL)
- {
- compiler->context->report_error("MSL function used on a non-MSL backend.");
- return SPVC_ERROR_INVALID_ARGUMENT;
- }
-
- auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
- MSLConstexprSampler samp;
+static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler)
+{
samp.s_address = static_cast<MSLSamplerAddress>(sampler->s_address);
samp.t_address = static_cast<MSLSamplerAddress>(sampler->t_address);
samp.r_address = static_cast<MSLSamplerAddress>(sampler->r_address);
samp.lod_clamp_min = sampler->lod_clamp_min;
samp.lod_clamp_max = sampler->lod_clamp_max;
- samp.lod_clamp_enable = sampler->lod_clamp_enable;
+ samp.lod_clamp_enable = sampler->lod_clamp_enable != 0;
samp.min_filter = static_cast<MSLSamplerFilter>(sampler->min_filter);
samp.mag_filter = static_cast<MSLSamplerFilter>(sampler->mag_filter);
samp.mip_filter = static_cast<MSLSamplerMipFilter>(sampler->mip_filter);
- samp.compare_enable = sampler->compare_enable;
- samp.anisotropy_enable = sampler->anisotropy_enable;
+ samp.compare_enable = sampler->compare_enable != 0;
+ samp.anisotropy_enable = sampler->anisotropy_enable != 0;
samp.max_anisotropy = sampler->max_anisotropy;
samp.compare_func = static_cast<MSLSamplerCompareFunc>(sampler->compare_func);
samp.coord = static_cast<MSLSamplerCoord>(sampler->coord);
samp.border_color = static_cast<MSLSamplerBorderColor>(sampler->border_color);
+}
+
+static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+ samp.ycbcr_conversion_enable = conv != nullptr;
+ if (conv == nullptr) return;
+ samp.planes = conv->planes;
+ samp.resolution = static_cast<MSLFormatResolution>(conv->resolution);
+ samp.chroma_filter = static_cast<MSLSamplerFilter>(conv->chroma_filter);
+ samp.x_chroma_offset = static_cast<MSLChromaLocation>(conv->x_chroma_offset);
+ samp.y_chroma_offset = static_cast<MSLChromaLocation>(conv->y_chroma_offset);
+ for (int i = 0; i < 4; i++)
+ samp.swizzle[i] = static_cast<MSLComponentSwizzle>(conv->swizzle[i]);
+ samp.ycbcr_model = static_cast<MSLSamplerYCbCrModelConversion>(conv->ycbcr_model);
+ samp.ycbcr_range = static_cast<MSLSamplerYCbCrRange>(conv->ycbcr_range);
+ samp.bpc = conv->bpc;
+}
+#endif
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id,
+ const spvc_msl_constexpr_sampler *sampler)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ MSLConstexprSampler samp;
+ spvc_convert_msl_sampler(samp, sampler);
+ msl.remap_constexpr_sampler(id, samp);
+ return SPVC_SUCCESS;
+#else
+ (void)id;
+ (void)sampler;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler,
+ unsigned desc_set, unsigned binding,
+ const spvc_msl_constexpr_sampler *sampler)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ MSLConstexprSampler samp;
+ spvc_convert_msl_sampler(samp, sampler);
+ msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp);
+ return SPVC_SUCCESS;
+#else
+ (void)desc_set;
+ (void)binding;
+ (void)sampler;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id,
+ const spvc_msl_constexpr_sampler *sampler,
+ const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ MSLConstexprSampler samp;
+ spvc_convert_msl_sampler(samp, sampler);
+ spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
msl.remap_constexpr_sampler(id, samp);
return SPVC_SUCCESS;
#else
(void)id;
(void)sampler;
+ (void)conv;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler,
+ unsigned desc_set, unsigned binding,
+ const spvc_msl_constexpr_sampler *sampler,
+ const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return SPVC_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ MSLConstexprSampler samp;
+ spvc_convert_msl_sampler(samp, sampler);
+ spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
+ msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp);
+ return SPVC_SUCCESS;
+#else
+ (void)desc_set;
+ (void)binding;
+ (void)sampler;
+ (void)conv;
compiler->context->report_error("MSL function used on a non-MSL backend.");
return SPVC_ERROR_INVALID_ARGUMENT;
#endif
@@ -965,6 +1166,42 @@ spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compi
#endif
}
+unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return uint32_t(-1);
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ return msl.get_automatic_msl_resource_binding(id);
+#else
+ (void)id;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return uint32_t(-1);
+#endif
+}
+
+unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id)
+{
+#if SPIRV_CROSS_C_API_MSL
+ if (compiler->backend != SPVC_BACKEND_MSL)
+ {
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return uint32_t(-1);
+ }
+
+ auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+ return msl.get_automatic_msl_resource_binding_secondary(id);
+#else
+ (void)id;
+ compiler->context->report_error("MSL function used on a non-MSL backend.");
+ return uint32_t(-1);
+#endif
+}
+
spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source)
{
SPVC_BEGIN_SAFE_SCOPE
@@ -1276,6 +1513,11 @@ const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, s
.c_str();
}
+const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index)
+{
+ return compiler->compiler->get_member_name(id, member_index).c_str();
+}
+
spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points,
size_t *num_entry_points)
{
@@ -1413,7 +1655,7 @@ unsigned spvc_type_get_bit_width(spvc_type type)
return type->width;
}
-unsigned spvc_type_get_SmallVector_size(spvc_type type)
+unsigned spvc_type_get_vector_size(spvc_type type)
{
return type->vecsize;
}
@@ -1516,6 +1758,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c
return SPVC_SUCCESS;
}
+spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size)
+{
+ SPVC_BEGIN_SAFE_SCOPE
+ {
+ *size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index);
+ }
+ SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT)
+ return SPVC_SUCCESS;
+}
+
spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset)
{
SPVC_BEGIN_SAFE_SCOPE
@@ -1643,6 +1895,32 @@ spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc
return ret;
}
+spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
+ spvc_variable_id id,
+ const spvc_buffer_range **ranges,
+ size_t *num_ranges)
+{
+ SPVC_BEGIN_SAFE_SCOPE
+ {
+ auto active_ranges = compiler->compiler->get_active_buffer_ranges(id);
+ SmallVector<spvc_buffer_range> translated;
+ translated.reserve(active_ranges.size());
+ for (auto &r : active_ranges)
+ {
+ spvc_buffer_range trans = { r.index, r.offset, r.range };
+ translated.push_back(trans);
+ }
+
+ auto ptr = spvc_allocate<TemporaryBuffer<spvc_buffer_range>>();
+ ptr->buffer = std::move(translated);
+ *ranges = ptr->buffer.data();
+ *num_ranges = ptr->buffer.size();
+ compiler->context->allocations.push_back(std::move(ptr));
+ }
+ SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY)
+ return SPVC_SUCCESS;
+}
+
float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row)
{
return constant->scalar_f16(column, row);
@@ -1854,6 +2132,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler)
#endif
}
+void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+ MSLConstexprSampler defaults;
+ conv->planes = defaults.planes;
+ conv->resolution = static_cast<spvc_msl_format_resolution>(defaults.resolution);
+ conv->chroma_filter = static_cast<spvc_msl_sampler_filter>(defaults.chroma_filter);
+ conv->x_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.x_chroma_offset);
+ conv->y_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.y_chroma_offset);
+ for (int i = 0; i < 4; i++)
+ conv->swizzle[i] = static_cast<spvc_msl_component_swizzle>(defaults.swizzle[i]);
+ conv->ycbcr_model = static_cast<spvc_msl_sampler_ycbcr_model_conversion>(defaults.ycbcr_model);
+ conv->ycbcr_range = static_cast<spvc_msl_sampler_ycbcr_range>(defaults.ycbcr_range);
+#else
+ memset(conv, 0, sizeof(*conv));
+#endif
+}
+
unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler)
{
return compiler->compiler->get_current_id_bound();
@@ -1866,6 +2162,15 @@ void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch)
*patch = SPVC_C_API_VERSION_PATCH;
}
+const char *spvc_get_commit_revision_and_timestamp(void)
+{
+#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
+ return SPIRV_CROSS_GIT_REVISION;
+#else
+ return "";
+#endif
+}
+
#ifdef _MSC_VER
#pragma warning(pop)
#endif
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
index 9e10d07..f950803 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h
@@ -33,7 +33,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 7
+#define SPVC_C_API_VERSION_MINOR 19
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@@ -58,6 +58,9 @@ extern "C" {
*/
SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch);
+/* Gets a human readable version string to identify which commit a particular binary was created from. */
+SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void);
+
/* These types are opaque to the user. */
typedef struct spvc_context_s *spvc_context;
typedef struct spvc_parsed_ir_s *spvc_parsed_ir;
@@ -112,6 +115,14 @@ typedef struct spvc_specialization_constant
} spvc_specialization_constant;
/* See C++ API. */
+typedef struct spvc_buffer_range
+{
+ unsigned index;
+ size_t offset;
+ size_t range;
+} spvc_buffer_range;
+
+/* See C++ API. */
typedef struct spvc_hlsl_root_constants
{
unsigned start;
@@ -290,9 +301,14 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
#define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u))
#define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
+#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
+#define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u))
+#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u))
+
+/* Obsolete. Sticks around for backwards compatibility. */
#define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
-/* Runtime check for incompatibility. */
+/* Runtime check for incompatibility. Obsolete. */
SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void);
/* Maps to C++ API. */
@@ -355,6 +371,55 @@ typedef enum spvc_msl_sampler_border_color
} spvc_msl_sampler_border_color;
/* Maps to C++ API. */
+typedef enum spvc_msl_format_resolution
+{
+ SPVC_MSL_FORMAT_RESOLUTION_444 = 0,
+ SPVC_MSL_FORMAT_RESOLUTION_422,
+ SPVC_MSL_FORMAT_RESOLUTION_420,
+ SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+} spvc_msl_format_resolution;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_chroma_location
+{
+ SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+ SPVC_MSL_CHROMA_LOCATION_MIDPOINT,
+ SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+} spvc_msl_chroma_location;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_component_swizzle
+{
+ SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+ SPVC_MSL_COMPONENT_SWIZZLE_ZERO,
+ SPVC_MSL_COMPONENT_SWIZZLE_ONE,
+ SPVC_MSL_COMPONENT_SWIZZLE_R,
+ SPVC_MSL_COMPONENT_SWIZZLE_G,
+ SPVC_MSL_COMPONENT_SWIZZLE_B,
+ SPVC_MSL_COMPONENT_SWIZZLE_A,
+ SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+} spvc_msl_component_swizzle;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_sampler_ycbcr_model_conversion
+{
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+ SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_model_conversion;
+
+/* Maps to C+ API. */
+typedef enum spvc_msl_sampler_ycbcr_range
+{
+ SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+ SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+ SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_range;
+
+/* Maps to C++ API. */
typedef struct spvc_msl_constexpr_sampler
{
spvc_msl_sampler_coord coord;
@@ -381,6 +446,26 @@ typedef struct spvc_msl_constexpr_sampler
*/
SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler);
+/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */
+typedef struct spvc_msl_sampler_ycbcr_conversion
+{
+ unsigned planes;
+ spvc_msl_format_resolution resolution;
+ spvc_msl_sampler_filter chroma_filter;
+ spvc_msl_chroma_location x_chroma_offset;
+ spvc_msl_chroma_location y_chroma_offset;
+ spvc_msl_component_swizzle swizzle[4];
+ spvc_msl_sampler_ycbcr_model_conversion ycbcr_model;
+ spvc_msl_sampler_ycbcr_range ycbcr_range;
+ unsigned bpc;
+} spvc_msl_sampler_ycbcr_conversion;
+
+/*
+ * Initializes the constexpr sampler struct.
+ * The defaults are non-zero.
+ */
+SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv);
+
/* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */
typedef enum spvc_compiler_option
{
@@ -407,7 +492,11 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+ /* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */
SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT,
+
SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT,
@@ -428,6 +517,17 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+ SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
+ SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT,
+ SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
+
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;
@@ -505,7 +605,12 @@ SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin
* Maps to C++ API.
*/
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler);
+
+/* Obsolete. Renamed to needs_swizzle_buffer. */
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler);
+
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler);
@@ -514,14 +619,23 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
const spvc_msl_resource_binding *binding);
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location);
SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler,
SpvExecutionModel model,
unsigned set,
unsigned binding);
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components);
+SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id);
+SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index);
+
/*
* Reflect resources.
* Maps almost 1:1 to C++ API.
@@ -567,6 +681,7 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compi
unsigned member_index, SpvDecoration decoration);
SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id,
unsigned member_index, SpvDecoration decoration);
+SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index);
/*
* Entry points.
@@ -626,6 +741,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ
SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size);
SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler,
spvc_type struct_type, size_t array_size, size_t *size);
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size);
SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler,
spvc_type type, unsigned index, unsigned *offset);
@@ -660,6 +776,15 @@ SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specializatio
spvc_specialization_constant *z);
/*
+ * Buffer ranges
+ * Maps to C++ API.
+ */
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler,
+ spvc_variable_id id,
+ const spvc_buffer_range **ranges,
+ size_t *num_ranges);
+
+/*
* No stdint.h until C99, sigh :(
* For smaller types, the result is sign or zero-extended as appropriate.
* Maps to C++ API.
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
index 393f461..7b507e1 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_containers.hpp
@@ -61,7 +61,8 @@ public:
private:
#if defined(_MSC_VER) && _MSC_VER < 1900
// MSVC 2013 workarounds, sigh ...
- union {
+ union
+ {
char aligned_char[sizeof(T) * N];
double dummy_aligner;
} u;
@@ -447,6 +448,11 @@ public:
}
}
+ void insert(T *itr, const T &value)
+ {
+ insert(itr, &value, &value + 1);
+ }
+
T *erase(T *itr)
{
std::move(itr + 1, this->end(), itr);
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
index 108000c..2387267 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp
@@ -41,6 +41,7 @@ ParsedIR::ParsedIR()
pool_group->pools[TypeCombinedImageSampler].reset(new ObjectPool<SPIRCombinedImageSampler>);
pool_group->pools[TypeAccessChain].reset(new ObjectPool<SPIRAccessChain>);
pool_group->pools[TypeUndef].reset(new ObjectPool<SPIRUndef>);
+ pool_group->pools[TypeString].reset(new ObjectPool<SPIRString>);
}
// Should have been default-implemented, but need this on MSVC 2013.
@@ -71,7 +72,8 @@ ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT
default_entry_point = other.default_entry_point;
source = other.source;
- loop_iteration_depth = other.loop_iteration_depth;
+ loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+ loop_iteration_depth_soft = other.loop_iteration_depth_soft;
}
return *this;
}
@@ -99,7 +101,8 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other)
entry_points = other.entry_points;
default_entry_point = other.default_entry_point;
source = other.source;
- loop_iteration_depth = other.loop_iteration_depth;
+ loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+ loop_iteration_depth_soft = other.loop_iteration_depth_soft;
addressing_model = other.addressing_model;
memory_model = other.memory_model;
@@ -159,7 +162,7 @@ static string ensure_valid_identifier(const string &name, bool member)
return str;
}
-const string &ParsedIR::get_name(uint32_t id) const
+const string &ParsedIR::get_name(ID id) const
{
auto *m = find_meta(id);
if (m)
@@ -168,7 +171,7 @@ const string &ParsedIR::get_name(uint32_t id) const
return empty_string;
}
-const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
+const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const
{
auto *m = find_meta(id);
if (m)
@@ -181,7 +184,7 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
return empty_string;
}
-void ParsedIR::set_name(uint32_t id, const string &name)
+void ParsedIR::set_name(ID id, const string &name)
{
auto &str = meta[id].decoration.alias;
str.clear();
@@ -196,7 +199,7 @@ void ParsedIR::set_name(uint32_t id, const string &name)
str = ensure_valid_identifier(name, false);
}
-void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name)
+void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name)
{
meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
@@ -212,7 +215,7 @@ void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name)
str = ensure_valid_identifier(name, true);
}
-void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument)
+void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument)
{
auto &dec = meta[id].decoration;
dec.decoration_flags.set(decoration);
@@ -228,7 +231,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s
}
}
-void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
{
auto &dec = meta[id].decoration;
dec.decoration_flags.set(decoration);
@@ -294,7 +297,7 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
}
}
-void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
{
meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
auto &dec = meta[id].members[index];
@@ -342,7 +345,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
// Recursively marks any constants referenced by the specified constant instruction as being used
// as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp).
-void ParsedIR::mark_used_as_array_length(uint32_t id)
+void ParsedIR::mark_used_as_array_length(ID id)
{
switch (ids[id].get_type())
{
@@ -353,8 +356,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id)
case TypeConstantOp:
{
auto &cop = get<SPIRConstantOp>(id);
- for (uint32_t arg_id : cop.arguments)
- mark_used_as_array_length(arg_id);
+ if (cop.opcode == OpCompositeExtract)
+ mark_used_as_array_length(cop.arguments[0]);
+ else if (cop.opcode == OpCompositeInsert)
+ {
+ mark_used_as_array_length(cop.arguments[0]);
+ mark_used_as_array_length(cop.arguments[1]);
+ }
+ else
+ for (uint32_t arg_id : cop.arguments)
+ mark_used_as_array_length(arg_id);
break;
}
@@ -390,7 +401,7 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const
return base_flags;
}
-const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const
{
auto *m = find_meta(id);
if (m)
@@ -403,12 +414,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index
return cleared_bitset;
}
-bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const
+bool ParsedIR::has_decoration(ID id, Decoration decoration) const
{
return get_decoration_bitset(id).get(decoration);
}
-uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
{
auto *m = find_meta(id);
if (!m)
@@ -449,7 +460,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
}
}
-const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const
{
auto *m = find_meta(id);
if (!m)
@@ -470,7 +481,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration
}
}
-void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
+void ParsedIR::unset_decoration(ID id, Decoration decoration)
{
auto &dec = meta[id].decoration;
dec.decoration_flags.clear(decoration);
@@ -532,12 +543,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
}
}
-bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
{
return get_member_decoration_bitset(id, index).get(decoration);
}
-uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
{
auto *m = find_meta(id);
if (!m)
@@ -571,7 +582,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration
}
}
-const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
+const Bitset &ParsedIR::get_decoration_bitset(ID id) const
{
auto *m = find_meta(id);
if (m)
@@ -583,7 +594,7 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
return cleared_bitset;
}
-void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument)
+void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument)
{
meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
auto &dec = meta[id].members[index];
@@ -600,7 +611,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat
}
}
-const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
{
auto *m = find_meta(id);
if (m)
@@ -623,7 +634,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index
return empty_string;
}
-void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
{
auto &m = meta[id];
if (index >= m.members.size())
@@ -676,7 +687,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount)
return uint32_t(curr_bound);
}
-void ParsedIR::remove_typed_id(Types type, uint32_t id)
+void ParsedIR::remove_typed_id(Types type, ID id)
{
auto &type_ids = ids_for_type[type];
type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids));
@@ -691,11 +702,18 @@ void ParsedIR::reset_all_of_type(Types type)
ids_for_type[type].clear();
}
-void ParsedIR::add_typed_id(Types type, uint32_t id)
+void ParsedIR::add_typed_id(Types type, ID id)
{
- if (loop_iteration_depth)
+ if (loop_iteration_depth_hard != 0)
SPIRV_CROSS_THROW("Cannot add typed ID while looping over it.");
+ if (loop_iteration_depth_soft != 0)
+ {
+ if (!ids[id].empty())
+ SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked.");
+ return;
+ }
+
if (ids[id].empty() || ids[id].get_type() != type)
{
switch (type)
@@ -730,7 +748,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id)
}
}
-const Meta *ParsedIR::find_meta(uint32_t id) const
+const Meta *ParsedIR::find_meta(ID id) const
{
auto itr = meta.find(id);
if (itr != end(meta))
@@ -739,7 +757,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const
return nullptr;
}
-Meta *ParsedIR::find_meta(uint32_t id)
+Meta *ParsedIR::find_meta(ID id)
{
auto itr = meta.find(id);
if (itr != end(meta))
@@ -748,4 +766,41 @@ Meta *ParsedIR::find_meta(uint32_t id)
return nullptr;
}
+ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const
+{
+ return ParsedIR::LoopLock(&loop_iteration_depth_hard);
+}
+
+ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const
+{
+ return ParsedIR::LoopLock(&loop_iteration_depth_soft);
+}
+
+ParsedIR::LoopLock::~LoopLock()
+{
+ if (lock)
+ (*lock)--;
+}
+
+ParsedIR::LoopLock::LoopLock(uint32_t *lock_)
+ : lock(lock_)
+{
+ if (lock)
+ (*lock)++;
+}
+
+ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+ *this = move(other);
+}
+
+ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+ if (lock)
+ (*lock)--;
+ lock = other.lock;
+ other.lock = nullptr;
+ return *this;
+}
+
} // namespace SPIRV_CROSS_NAMESPACE
diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
index 79e9e15..97bc9ea 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp
@@ -57,19 +57,19 @@ public:
SmallVector<Variant> ids;
// Various meta data for IDs, decorations, names, etc.
- std::unordered_map<uint32_t, Meta> meta;
+ std::unordered_map<ID, Meta> meta;
// Holds all IDs which have a certain type.
// This is needed so we can iterate through a specific kind of resource quickly,
// and in-order of module declaration.
- SmallVector<uint32_t> ids_for_type[TypeCount];
+ SmallVector<ID> ids_for_type[TypeCount];
// Special purpose lists which contain a union of types.
// This is needed so we can declare specialization constants and structs in an interleaved fashion,
// among other things.
// Constants can be of struct type, and struct array sizes can use specialization constants.
- SmallVector<uint32_t> ids_for_constant_or_type;
- SmallVector<uint32_t> ids_for_constant_or_variable;
+ SmallVector<ID> ids_for_constant_or_type;
+ SmallVector<ID> ids_for_constant_or_variable;
// Declared capabilities and extensions in the SPIR-V module.
// Not really used except for reflection at the moment.
@@ -88,12 +88,12 @@ public:
};
using BlockMetaFlags = uint8_t;
SmallVector<BlockMetaFlags> block_meta;
- std::unordered_map<uint32_t, uint32_t> continue_block_to_loop_header;
+ std::unordered_map<BlockID, BlockID> continue_block_to_loop_header;
// Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction.
// Entry points can therefore be seen as some sort of meta structure.
- std::unordered_map<uint32_t, SPIREntryPoint> entry_points;
- uint32_t default_entry_point = 0;
+ std::unordered_map<FunctionID, SPIREntryPoint> entry_points;
+ FunctionID default_entry_point = 0;
struct Source
{
@@ -114,50 +114,75 @@ public:
// Can be useful for simple "raw" reflection.
// However, most members are here because the Parser needs most of these,
// and might as well just have the whole suite of decoration/name handling in one place.
- void set_name(uint32_t id, const std::string &name);
- const std::string &get_name(uint32_t id) const;
- void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
- void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
- bool has_decoration(uint32_t id, spv::Decoration decoration) const;
- uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
- const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
- const Bitset &get_decoration_bitset(uint32_t id) const;
- void unset_decoration(uint32_t id, spv::Decoration decoration);
+ void set_name(ID id, const std::string &name);
+ const std::string &get_name(ID id) const;
+ void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+ void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
+ bool has_decoration(ID id, spv::Decoration decoration) const;
+ uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+ const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
+ const Bitset &get_decoration_bitset(ID id) const;
+ void unset_decoration(ID id, spv::Decoration decoration);
// Decoration handling methods (for members of a struct).
- void set_member_name(uint32_t id, uint32_t index, const std::string &name);
- const std::string &get_member_name(uint32_t id, uint32_t index) const;
- void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
- void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+ void set_member_name(TypeID id, uint32_t index, const std::string &name);
+ const std::string &get_member_name(TypeID id, uint32_t index) const;
+ void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+ void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
const std::string &argument);
- uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
- const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
- bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
- const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
- void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+ uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+ const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
+ bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+ const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
+ void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
- void mark_used_as_array_length(uint32_t id);
+ void mark_used_as_array_length(ID id);
uint32_t increase_bound_by(uint32_t count);
Bitset get_buffer_block_flags(const SPIRVariable &var) const;
- void add_typed_id(Types type, uint32_t id);
- void remove_typed_id(Types type, uint32_t id);
+ void add_typed_id(Types type, ID id);
+ void remove_typed_id(Types type, ID id);
+
+ class LoopLock
+ {
+ public:
+ explicit LoopLock(uint32_t *counter);
+ LoopLock(const LoopLock &) = delete;
+ void operator=(const LoopLock &) = delete;
+ LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+ LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+ ~LoopLock();
+
+ private:
+ uint32_t *lock;
+ };
+
+ // This must be held while iterating over a type ID array.
+ // It is undefined if someone calls set<>() while we're iterating over a data structure, so we must
+ // make sure that this case is avoided.
+
+ // If we have a hard lock, it is an error to call set<>(), and an exception is thrown.
+ // If we have a soft lock, we silently ignore any additions to the typed arrays.
+ // This should only be used for physical ID remapping where we need to create an ID, but we will never
+ // care about iterating over them.
+ LoopLock create_loop_hard_lock() const;
+ LoopLock create_loop_soft_lock() const;
template <typename T, typename Op>
void for_each_typed_id(const Op &op)
{
- loop_iteration_depth++;
+ auto loop_lock = create_loop_hard_lock();
for (auto &id : ids_for_type[T::type])
{
if (ids[id].get_type() == static_cast<Types>(T::type))
op(id, get<T>(id));
}
- loop_iteration_depth--;
}
template <typename T, typename Op>
void for_each_typed_id(const Op &op) const
{
+ auto loop_lock = create_loop_hard_lock();
for (auto &id : ids_for_type[T::type])
{
if (ids[id].get_type() == static_cast<Types>(T::type))
@@ -173,8 +198,8 @@ public:
void reset_all_of_type(Types type);
- Meta *find_meta(uint32_t id);
- const Meta *find_meta(uint32_t id) const;
+ Meta *find_meta(ID id);
+ const Meta *find_meta(ID id) const;
const std::string &get_empty_string() const
{
@@ -194,7 +219,8 @@ private:
return variant_get<T>(ids[id]);
}
- uint32_t loop_iteration_depth = 0;
+ mutable uint32_t loop_iteration_depth_hard = 0;
+ mutable uint32_t loop_iteration_depth_soft = 0;
std::string empty_string;
Bitset cleared_bitset;
};
diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
index 32582fb..bcdcd5f 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp
@@ -288,8 +288,17 @@ static uint32_t pls_format_to_components(PlsFormat format)
static const char *vector_swizzle(int vecsize, int index)
{
- static const char *swizzle[4][4] = {
- { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" }
+ static const char *const swizzle[4][4] = {
+ { ".x", ".y", ".z", ".w" },
+ { ".xy", ".yz", ".zw", nullptr },
+ { ".xyz", ".yzw", nullptr, nullptr },
+#if defined(__GNUC__) && (__GNUC__ == 9)
+ // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
+ // This array ends up being compiled as all nullptrs, tripping the assertions below.
+ { "", nullptr, nullptr, "$" },
+#else
+ { "", nullptr, nullptr, nullptr },
+#endif
};
assert(vecsize >= 1 && vecsize <= 4);
@@ -313,6 +322,7 @@ void CompilerGLSL::reset()
// Clear temporary usage tracking.
expression_usage_counts.clear();
forwarded_temporaries.clear();
+ suppressed_usage_tracking.clear();
reset_name_caches();
@@ -494,11 +504,14 @@ string CompilerGLSL::compile()
backend.supports_extensions = true;
// Scan the SPIR-V to find trivial uses of extensions.
+ fixup_type_alias();
+ reorder_type_alias();
build_function_control_flow_graphs_and_analyze();
find_static_extensions();
fixup_image_load_store_access();
update_active_builtins();
analyze_image_and_sampler_usage();
+ analyze_interlocked_resource_usage();
// Shaders might cast unrelated data to pointers of non-block types.
// Find all such instances and make sure we can cast the pointers to a synthesized block type.
@@ -523,6 +536,25 @@ string CompilerGLSL::compile()
pass_count++;
} while (is_forcing_recompilation());
+ // Implement the interlocked wrapper function at the end.
+ // The body was implemented in lieu of main().
+ if (interlocked_is_complex)
+ {
+ statement("void main()");
+ begin_scope();
+ statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
+ if (options.es)
+ statement("beginInvocationInterlockNV();");
+ else
+ statement("beginInvocationInterlockARB();");
+ statement("spvMainInterlockedBody();");
+ if (options.es)
+ statement("endInvocationInterlockNV();");
+ else
+ statement("endInvocationInterlockARB();");
+ end_scope();
+ }
+
// Entry point in GLSL is always main().
get_entry_point().name = "main";
@@ -589,6 +621,30 @@ void CompilerGLSL::emit_header()
require_extension_internal("GL_ARB_shader_image_load_store");
}
+ // Needed for: layout(post_depth_coverage) in;
+ if (execution.flags.get(ExecutionModePostDepthCoverage))
+ require_extension_internal("GL_ARB_post_depth_coverage");
+
+ // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+ if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+ execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+ execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+ execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+ {
+ if (options.es)
+ {
+ if (options.version < 310)
+ SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+ require_extension_internal("GL_NV_fragment_shader_interlock");
+ }
+ else
+ {
+ if (options.version < 420)
+ require_extension_internal("GL_ARB_shader_image_load_store");
+ require_extension_internal("GL_ARB_fragment_shader_interlock");
+ }
+ }
+
for (auto &ext : forced_extensions)
{
if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@@ -624,6 +680,19 @@ void CompilerGLSL::emit_header()
statement("#endif");
}
}
+ else if (ext == "GL_ARB_post_depth_coverage")
+ {
+ if (options.es)
+ statement("#extension GL_EXT_post_depth_coverage : require");
+ else
+ {
+ statement("#if defined(GL_ARB_post_depth_coverge)");
+ statement("#extension GL_ARB_post_depth_coverage : require");
+ statement("#else");
+ statement("#extension GL_EXT_post_depth_coverage : require");
+ statement("#endif");
+ }
+ }
else
statement("#extension ", ext, " : require");
}
@@ -698,7 +767,8 @@ void CompilerGLSL::emit_header()
// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
// declarations before we can emit the work group size.
- if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0)))
+ if (options.vulkan_semantics ||
+ ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
build_workgroup_size(inputs, wg_x, wg_y, wg_z);
}
else
@@ -752,6 +822,17 @@ void CompilerGLSL::emit_header()
if (execution.flags.get(ExecutionModeEarlyFragmentTests))
inputs.push_back("early_fragment_tests");
+ if (execution.flags.get(ExecutionModePostDepthCoverage))
+ inputs.push_back("post_depth_coverage");
+
+ if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+ inputs.push_back("pixel_interlock_ordered");
+ else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+ inputs.push_back("pixel_interlock_unordered");
+ else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+ inputs.push_back("sample_interlock_ordered");
+ else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+ inputs.push_back("sample_interlock_unordered");
if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
statement("layout(depth_greater) out float gl_FragDepth;");
@@ -784,7 +865,8 @@ void CompilerGLSL::emit_struct(SPIRType &type)
// Type-punning with these types is legal, which complicates things
// when we are storing struct and array types in an SSBO for example.
// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
- if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+ if (type.type_alias != TypeID(0) &&
+ !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
return;
add_resource_name(type.self);
@@ -812,6 +894,9 @@ void CompilerGLSL::emit_struct(SPIRType &type)
emitted = true;
}
+ if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
+ emit_struct_padding_target(type);
+
end_scope_decl();
if (emitted)
@@ -821,8 +906,6 @@ void CompilerGLSL::emit_struct(SPIRType &type)
string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
{
string res;
- if (flags.get(DecorationNonUniformEXT))
- res += "nonuniformEXT ";
//if (flags & (1ull << DecorationSmooth))
// res += "smooth ";
if (flags.get(DecorationFlat))
@@ -902,7 +985,8 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
// This is only done selectively in GLSL as needed.
- if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset))
+ if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
+ dec.decoration_flags.get(DecorationOffset))
attr.push_back(join("offset = ", dec.offset));
if (attr.empty())
@@ -1322,14 +1406,20 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
// Only care about packing if we are in the given range
if (offset >= start_offset)
{
+ uint32_t actual_offset = type_struct_member_offset(type, i);
+
// We only care about offsets in std140, std430, etc ...
// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
if (!packing_has_flexible_offset(packing))
{
- uint32_t actual_offset = type_struct_member_offset(type, i);
if (actual_offset != offset) // This cannot be the packing we're looking for.
return false;
}
+ else if ((actual_offset & (alignment - 1)) != 0)
+ {
+ // We still need to verify that alignment rules are observed, even if we have explicit offset.
+ return false;
+ }
// Verify array stride rules.
if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
@@ -1408,6 +1498,8 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
attr.push_back("push_constant");
+ else if (var.storage == StorageClassShaderRecordBufferNV)
+ attr.push_back("shaderRecordNV");
if (flags.get(DecorationRowMajor))
attr.push_back("row_major");
@@ -1453,14 +1545,14 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
// Do not emit set = decoration in regular GLSL output, but
// we need to preserve it in Vulkan GLSL mode.
- if (var.storage != StorageClassPushConstant)
+ if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV)
{
if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
attr.push_back(join("set = ", dec.set));
}
bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
- bool ssbo_block = var.storage == StorageClassStorageBuffer ||
+ bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
(var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
@@ -1482,6 +1574,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
can_use_binding = false;
+ if (var.storage == StorageClassShaderRecordBufferNV)
+ can_use_binding = false;
+
if (can_use_binding && flags.get(DecorationBinding))
attr.push_back(join("binding = ", dec.binding));
@@ -1517,9 +1612,9 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
return res;
}
-string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check_std430)
+string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
{
- if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430))
+ if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
return "std430";
else if (buffer_is_packing_standard(type, BufferPackingStd140))
return "std140";
@@ -1528,7 +1623,8 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
require_extension_internal("GL_EXT_scalar_block_layout");
return "scalar";
}
- else if (check_std430 && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
+ else if (support_std430_without_scalar_layout &&
+ buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
{
if (options.es && !options.vulkan_semantics)
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
@@ -1536,7 +1632,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension_internal("GL_ARB_enhanced_layouts");
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
return "std430";
}
else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
@@ -1550,15 +1646,30 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool check
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension_internal("GL_ARB_enhanced_layouts");
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
return "std140";
}
else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
{
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
require_extension_internal("GL_EXT_scalar_block_layout");
return "scalar";
}
+ else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
+ buffer_is_packing_standard(type, BufferPackingStd430))
+ {
+ // UBOs can support std430 with GL_EXT_scalar_block_layout.
+ require_extension_internal("GL_EXT_scalar_block_layout");
+ return "std430";
+ }
+ else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
+ buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
+ {
+ // UBOs can support std430 with GL_EXT_scalar_block_layout.
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
+ require_extension_internal("GL_EXT_scalar_block_layout");
+ return "std430";
+ }
else
{
SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
@@ -1727,7 +1838,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
auto &type = get<SPIRType>(var.basetype);
Bitset flags = ir.get_buffer_block_flags(var);
- bool ssbo = var.storage == StorageClassStorageBuffer ||
+ bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
bool is_restrict = ssbo && flags.get(DecorationRestrict);
bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
@@ -1844,6 +1955,14 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
{
return "hitAttributeNV ";
}
+ else if (var.storage == StorageClassCallableDataNV)
+ {
+ return "callableDataNV ";
+ }
+ else if (var.storage == StorageClassIncomingCallableDataNV)
+ {
+ return "callableDataInNV ";
+ }
return "";
}
@@ -2015,7 +2134,7 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
auto name = to_name(constant.self);
SpecializationConstant wg_x, wg_y, wg_z;
- uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+ ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
// This specialization constant is implicitly declared by emitting layout() in;
if (constant.self == workgroup_size_id)
@@ -2024,7 +2143,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
// These specialization constants are implicitly declared by emitting layout() in;
// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
// later can use macro overrides for work group size.
- bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id;
+ bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
+ ConstantID(constant.self) == wg_z.id;
if (options.vulkan_semantics && is_workgroup_size_constant)
{
@@ -2374,7 +2494,7 @@ void CompilerGLSL::declare_undefined_values()
bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
{
- bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable;
+ bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
if (statically_assigned)
{
@@ -2446,44 +2566,47 @@ void CompilerGLSL::emit_resources()
// emit specialization constants as actual floats,
// spec op expressions will redirect to the constant name.
//
- for (auto &id_ : ir.ids_for_constant_or_type)
{
- auto &id = ir.ids[id_];
-
- if (id.get_type() == TypeConstant)
+ auto loop_lock = ir.create_loop_hard_lock();
+ for (auto &id_ : ir.ids_for_constant_or_type)
{
- auto &c = id.get<SPIRConstant>();
-
- bool needs_declaration = c.specialization || c.is_used_as_lut;
+ auto &id = ir.ids[id_];
- if (needs_declaration)
+ if (id.get_type() == TypeConstant)
{
- if (!options.vulkan_semantics && c.specialization)
+ auto &c = id.get<SPIRConstant>();
+
+ bool needs_declaration = c.specialization || c.is_used_as_lut;
+
+ if (needs_declaration)
{
- c.specialization_constant_macro_name =
- constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+ if (!options.vulkan_semantics && c.specialization)
+ {
+ c.specialization_constant_macro_name =
+ constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+ }
+ emit_constant(c);
+ emitted = true;
}
- emit_constant(c);
+ }
+ else if (id.get_type() == TypeConstantOp)
+ {
+ emit_specialization_constant_op(id.get<SPIRConstantOp>());
emitted = true;
}
- }
- else if (id.get_type() == TypeConstantOp)
- {
- emit_specialization_constant_op(id.get<SPIRConstantOp>());
- emitted = true;
- }
- else if (id.get_type() == TypeType)
- {
- auto &type = id.get<SPIRType>();
- if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
- (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
- !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+ else if (id.get_type() == TypeType)
{
- if (emitted)
- statement("");
- emitted = false;
+ auto &type = id.get<SPIRType>();
+ if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
+ (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
+ !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+ {
+ if (emitted)
+ statement("");
+ emitted = false;
- emit_struct(type);
+ emit_struct(type);
+ }
}
}
}
@@ -2500,7 +2623,7 @@ void CompilerGLSL::emit_resources()
SpecializationConstant wg_x, wg_y, wg_z;
get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
- if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0))
+ if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
{
SmallVector<string> inputs;
build_workgroup_size(inputs, wg_x, wg_y, wg_z);
@@ -2545,7 +2668,8 @@ void CompilerGLSL::emit_resources()
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
- bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
+ bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
+ type.storage == StorageClassShaderRecordBufferNV;
bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
@@ -2585,8 +2709,9 @@ void CompilerGLSL::emit_resources()
if (var.storage != StorageClassFunction && type.pointer &&
(type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
- type.storage == StorageClassRayPayloadNV || type.storage == StorageClassHitAttributeNV ||
- type.storage == StorageClassIncomingRayPayloadNV) &&
+ type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV ||
+ type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV ||
+ type.storage == StorageClassHitAttributeNV) &&
!is_hidden_variable(var))
{
emit_uniform(var);
@@ -2646,7 +2771,7 @@ void CompilerGLSL::emit_resources()
// Returns a string representation of the ID, usable as a function arg.
// Default is to simply return the expression representation fo the arg ID.
// Subclasses may override to modify the return value.
-string CompilerGLSL::to_func_call_arg(uint32_t id)
+string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
{
// Make sure that we use the name of the original variable, and not the parameter alias.
uint32_t name_id = id;
@@ -2667,8 +2792,8 @@ void CompilerGLSL::handle_invalid_expression(uint32_t id)
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
// GLSL does not support packed formats, so simply return the expression.
-// Subclasses that do will override
-string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t)
+// Subclasses that do will override.
+string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
{
return expr_str;
}
@@ -2762,13 +2887,22 @@ string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std
string CompilerGLSL::address_of_expression(const std::string &expr)
{
- // If this expression starts with a dereference operator ('*'), then
- // just return the part after the operator.
- // TODO: Strip parens if unnecessary?
- if (expr.front() == '*')
+ if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
+ {
+ // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
+ // the first two and last characters. We might have to enclose the expression.
+ // This doesn't work for cases like (*foo + 10),
+ // but this is an r-value expression which we cannot take the address of anyways.
+ return enclose_expression(expr.substr(2, expr.size() - 3));
+ }
+ else if (expr.front() == '*')
+ {
+ // If this expression starts with a dereference operator ('*'), then
+ // just return the part after the operator.
return expr.substr(1);
+ }
else
- return join('&', expr);
+ return join('&', enclose_expression(expr));
}
// Just like to_expression except that we enclose the expression inside parentheses if needed.
@@ -2777,14 +2911,30 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expressio
return enclose_expression(to_expression(id, register_expression_read));
}
+// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
+// need_transpose must be forced to false.
+string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
+{
+ return unpack_expression_type(to_expression(id), expression_type(id),
+ get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+ has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
+}
+
string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
{
// If we need to transpose, it will also take care of unpacking rules.
auto *e = maybe_get<SPIRExpression>(id);
bool need_transpose = e && e->need_transpose;
- if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
- return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
- get_extended_decoration(id, SPIRVCrossDecorationPackedType));
+ bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+ bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+
+ if (!need_transpose && (is_remapped || is_packed))
+ {
+ return unpack_expression_type(to_expression(id, register_expression_read),
+ get_pointee_type(expression_type_id(id)),
+ get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+ has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
+ }
else
return to_expression(id, register_expression_read);
}
@@ -2794,9 +2944,14 @@ string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_
// If we need to transpose, it will also take care of unpacking rules.
auto *e = maybe_get<SPIRExpression>(id);
bool need_transpose = e && e->need_transpose;
- if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
+ bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+ bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+ if (!need_transpose && (is_remapped || is_packed))
+ {
return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
- get_extended_decoration(id, SPIRVCrossDecorationPackedType));
+ get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+ has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
+ }
else
return to_enclosed_expression(id, register_expression_read);
}
@@ -2831,12 +2986,55 @@ string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_e
string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
{
auto expr = to_enclosed_expression(id);
- if (has_extended_decoration(id, SPIRVCrossDecorationPacked))
+ if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
return join(expr, "[", index, "]");
else
return join(expr, ".", index_to_swizzle(index));
}
+string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
+{
+ uint32_t size = to_array_size_literal(type);
+ auto &parent = get<SPIRType>(type.parent_type);
+ string expr = "{ ";
+
+ for (uint32_t i = 0; i < size; i++)
+ {
+ auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
+ if (parent.array.empty())
+ expr += subexpr;
+ else
+ expr += to_rerolled_array_expression(subexpr, parent);
+
+ if (i + 1 < size)
+ expr += ", ";
+ }
+
+ expr += " }";
+ return expr;
+}
+
+string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
+{
+ auto &type = expression_type(id);
+ if (!backend.array_is_value_type && !type.array.empty())
+ {
+ // For this case, we need to "re-roll" an array initializer from a temporary.
+ // We cannot simply pass the array directly, since it decays to a pointer and it cannot
+ // participate in a struct initializer. E.g.
+ // float arr[2] = { 1.0, 2.0 };
+ // Foo foo = { arr }; must be transformed to
+ // Foo foo = { { arr[0], arr[1] } };
+ // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
+
+ // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
+ // as temporaries anyways.
+ return to_rerolled_array_expression(to_enclosed_expression(id), type);
+ }
+ else
+ return to_unpacked_expression(id);
+}
+
string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
{
auto itr = invalid_expressions.find(id);
@@ -2874,8 +3072,12 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose)
{
- bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked);
- return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
+ // This should not be reached for access chains, since we always deal explicitly with transpose state
+ // when consuming an access chain expression.
+ uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+ bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+ return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
+ is_packed);
}
else
{
@@ -3675,15 +3877,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
if (splat)
{
res += convert_to_string(c.scalar(vector, 0));
- if (is_legacy())
- {
- // Fake unsigned constant literals with signed ones if possible.
- // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
- if (c.scalar_i16(vector, 0) < 0)
- SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
- }
- else
- res += backend.uint16_t_literal_suffix;
}
else
{
@@ -3693,17 +3886,19 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
res += to_name(c.specialization_constant_id(vector, i));
else
{
- res += convert_to_string(c.scalar(vector, i));
- if (is_legacy())
+ if (*backend.uint16_t_literal_suffix)
{
- // Fake unsigned constant literals with signed ones if possible.
- // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
- if (c.scalar_i16(vector, i) < 0)
- SPIRV_CROSS_THROW(
- "Tried to convert uint literal into int, but this made the literal negative.");
+ res += convert_to_string(c.scalar_u16(vector, i));
+ res += backend.uint16_t_literal_suffix;
}
else
- res += backend.uint16_t_literal_suffix;
+ {
+ // If backend doesn't have a literal suffix, we need to value cast.
+ res += type_to_glsl(scalar_type);
+ res += "(";
+ res += convert_to_string(c.scalar_u16(vector, i));
+ res += ")";
+ }
}
if (i + 1 < c.vector_size())
@@ -3716,7 +3911,6 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
if (splat)
{
res += convert_to_string(c.scalar_i16(vector, 0));
- res += backend.int16_t_literal_suffix;
}
else
{
@@ -3726,9 +3920,21 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
res += to_name(c.specialization_constant_id(vector, i));
else
{
- res += convert_to_string(c.scalar_i16(vector, i));
- res += backend.int16_t_literal_suffix;
+ if (*backend.int16_t_literal_suffix)
+ {
+ res += convert_to_string(c.scalar_i16(vector, i));
+ res += backend.int16_t_literal_suffix;
+ }
+ else
+ {
+ // If backend doesn't have a literal suffix, we need to value cast.
+ res += type_to_glsl(scalar_type);
+ res += "(";
+ res += convert_to_string(c.scalar_i16(vector, i));
+ res += ")";
+ }
}
+
if (i + 1 < c.vector_size())
res += ", ";
}
@@ -3883,9 +4089,14 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
}
}
-bool CompilerGLSL::expression_is_forwarded(uint32_t id)
+bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
+{
+ return forwarded_temporaries.count(id) != 0;
+}
+
+bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
{
- return forwarded_temporaries.find(id) != end(forwarded_temporaries);
+ return suppressed_usage_tracking.count(id) != 0;
}
SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
@@ -3895,8 +4106,9 @@ SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id,
{
// Just forward it without temporary.
// If the forward is trivial, we do not force flushing to temporary for this expression.
- if (!suppress_usage_tracking)
- forwarded_temporaries.insert(result_id);
+ forwarded_temporaries.insert(result_id);
+ if (suppress_usage_tracking)
+ suppressed_usage_tracking.insert(result_id);
return set<SPIRExpression>(result_id, rhs, result_type, true);
}
@@ -3947,8 +4159,18 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_
}
void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
- const char *op)
+ const char *op, bool negate, SPIRType::BaseType expected_type)
{
+ auto &type0 = expression_type(op0);
+ auto &type1 = expression_type(op1);
+
+ SPIRType target_type0 = type0;
+ SPIRType target_type1 = type1;
+ target_type0.basetype = expected_type;
+ target_type1.basetype = expected_type;
+ target_type0.vecsize = 1;
+ target_type1.vecsize = 1;
+
auto &type = get<SPIRType>(result_type);
auto expr = type_to_glsl_constructor(type);
expr += '(';
@@ -3956,11 +4178,25 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result
{
// Make sure to call to_expression multiple times to ensure
// that these expressions are properly flushed to temporaries if needed.
- expr += to_extract_component_expression(op0, i);
+ if (negate)
+ expr += "!(";
+
+ if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
+ expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
+ else
+ expr += to_extract_component_expression(op0, i);
+
expr += ' ';
expr += op;
expr += ' ';
- expr += to_extract_component_expression(op1, i);
+
+ if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
+ expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
+ else
+ expr += to_extract_component_expression(op1, i);
+
+ if (negate)
+ expr += ")";
if (i + 1 < type.vecsize)
expr += ", ";
@@ -4082,6 +4318,58 @@ void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result
inherit_expression_dependencies(result_id, op0);
}
+// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
+// and different vector sizes all at once. Need a special purpose method here.
+void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+ uint32_t op2, const char *op,
+ SPIRType::BaseType expected_result_type,
+ SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+ SPIRType::BaseType input_type2)
+{
+ auto &out_type = get<SPIRType>(result_type);
+ auto expected_type = out_type;
+ expected_type.basetype = input_type0;
+
+ string cast_op0 =
+ expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
+
+ auto op1_expr = to_unpacked_expression(op1);
+ auto op2_expr = to_unpacked_expression(op2);
+
+ // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
+ expected_type.basetype = input_type1;
+ expected_type.vecsize = 1;
+ string cast_op1 = expression_type(op1).basetype != input_type1 ?
+ join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
+ op1_expr;
+
+ expected_type.basetype = input_type2;
+ expected_type.vecsize = 1;
+ string cast_op2 = expression_type(op2).basetype != input_type2 ?
+ join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
+ op2_expr;
+
+ string expr;
+ if (out_type.basetype != expected_result_type)
+ {
+ expected_type.vecsize = out_type.vecsize;
+ expected_type.basetype = expected_result_type;
+ expr = bitcast_glsl_op(out_type, expected_type);
+ expr += '(';
+ expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+ expr += ')';
+ }
+ else
+ {
+ expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+ }
+
+ emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
+ inherit_expression_dependencies(result_id, op0);
+ inherit_expression_dependencies(result_id, op1);
+ inherit_expression_dependencies(result_id, op2);
+}
+
void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
uint32_t op2, const char *op, SPIRType::BaseType input_type)
{
@@ -4170,6 +4458,44 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
inherit_expression_dependencies(result_id, op3);
}
+void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+ uint32_t op2, uint32_t op3, const char *op,
+ SPIRType::BaseType offset_count_type)
+{
+ // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
+ // and bitfieldInsert is sign invariant.
+ bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
+
+ auto op0_expr = to_unpacked_expression(op0);
+ auto op1_expr = to_unpacked_expression(op1);
+ auto op2_expr = to_unpacked_expression(op2);
+ auto op3_expr = to_unpacked_expression(op3);
+
+ SPIRType target_type;
+ target_type.vecsize = 1;
+ target_type.basetype = offset_count_type;
+
+ if (expression_type(op2).basetype != offset_count_type)
+ {
+ // Value-cast here. Input might be 16-bit. GLSL requires int.
+ op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
+ }
+
+ if (expression_type(op3).basetype != offset_count_type)
+ {
+ // Value-cast here. Input might be 16-bit. GLSL requires int.
+ op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
+ }
+
+ emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
+ forward);
+
+ inherit_expression_dependencies(result_id, op0);
+ inherit_expression_dependencies(result_id, op1);
+ inherit_expression_dependencies(result_id, op2);
+ inherit_expression_dependencies(result_id, op3);
+}
+
// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
// functions in vertex shaders so we revert those back to plain calls when
@@ -4401,7 +4727,7 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
}
string mix_op;
- bool has_boolean_mix = backend.boolean_mix_support &&
+ bool has_boolean_mix = *backend.boolean_mix_function &&
((options.es && options.version >= 310) || (!options.es && options.version >= 450));
bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
@@ -4431,11 +4757,13 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
inherit_expression_dependencies(id, right);
inherit_expression_dependencies(id, lerp);
}
+ else if (lerptype.basetype == SPIRType::Boolean)
+ emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
else
emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
}
-string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
+string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
{
// Keep track of the array indices we have used to load the image.
// We'll need to use the same array index into the combined image sampler array.
@@ -4457,18 +4785,18 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_
samp_id = samp->self;
auto image_itr = find_if(begin(args), end(args),
- [image_id](const SPIRFunction::Parameter &param) { return param.id == image_id; });
+ [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
auto sampler_itr = find_if(begin(args), end(args),
- [samp_id](const SPIRFunction::Parameter &param) { return param.id == samp_id; });
+ [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
if (image_itr != end(args) || sampler_itr != end(args))
{
// If any parameter originates from a parameter, we will find it in our argument list.
bool global_image = image_itr == end(args);
bool global_sampler = sampler_itr == end(args);
- uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args));
- uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args));
+ VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
+ VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
auto &combined = current_function->combined_parameters;
auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
@@ -4509,15 +4837,16 @@ void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
{
emit_binary_func_op(result_type, result_id, image_id, samp_id,
type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
-
- // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
- forwarded_temporaries.erase(result_id);
}
else
{
// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
}
+
+ // Make sure to suppress usage tracking and any expression invalidation.
+ // It is illegal to create temporaries of opaque types.
+ forwarded_temporaries.erase(result_id);
}
static inline bool image_opcode_is_sample_no_dref(Op op)
@@ -4547,13 +4876,41 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
{
auto *ops = stream(i);
auto op = static_cast<Op>(i.op);
- uint32_t length = i.length;
SmallVector<uint32_t> inherited_expressions;
- uint32_t result_type = ops[0];
+ uint32_t result_type_id = ops[0];
uint32_t id = ops[1];
- uint32_t img = ops[2];
+
+ bool forward = false;
+ string expr = to_texture_op(i, &forward, inherited_expressions);
+ emit_op(result_type_id, id, expr, forward);
+ for (auto &inherit : inherited_expressions)
+ inherit_expression_dependencies(id, inherit);
+
+ switch (op)
+ {
+ case OpImageSampleDrefImplicitLod:
+ case OpImageSampleImplicitLod:
+ case OpImageSampleProjImplicitLod:
+ case OpImageSampleProjDrefImplicitLod:
+ register_control_dependent_expression(id);
+ break;
+
+ default:
+ break;
+ }
+}
+
+std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward,
+ SmallVector<uint32_t> &inherited_expressions)
+{
+ auto *ops = stream(i);
+ auto op = static_cast<Op>(i.op);
+ uint32_t length = i.length;
+
+ uint32_t result_type_id = ops[0];
+ VariableID img = ops[2];
uint32_t coord = ops[3];
uint32_t dref = 0;
uint32_t comp = 0;
@@ -4562,8 +4919,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
bool fetch = false;
const uint32_t *opt = nullptr;
+ auto &result_type = get<SPIRType>(result_type_id);
+
inherited_expressions.push_back(coord);
+ // Make sure non-uniform decoration is back-propagated to where it needs to be.
+ if (has_decoration(img, DecorationNonUniformEXT))
+ propagate_nonuniform_qualifier(img);
+
switch (op)
{
case OpImageSampleDrefImplicitLod:
@@ -4658,6 +5021,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
uint32_t offset = 0;
uint32_t coffsets = 0;
uint32_t sample = 0;
+ uint32_t minlod = 0;
uint32_t flags = 0;
if (length)
@@ -4683,14 +5047,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
test(offset, ImageOperandsOffsetMask);
test(coffsets, ImageOperandsConstOffsetsMask);
test(sample, ImageOperandsSampleMask);
+ test(minlod, ImageOperandsMinLodMask);
string expr;
- bool forward = false;
expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
- (!!grad_x || !!grad_y), !!dref, lod);
+ (!!grad_x || !!grad_y), !!dref, lod, minlod);
expr += "(";
expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
- coffset, offset, bias, comp, sample, &forward);
+ coffset, offset, bias, comp, sample, minlod, forward);
expr += ")";
// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
@@ -4703,7 +5067,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
{
bool image_is_depth = false;
const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
- uint32_t image_id = combined ? combined->image : img;
+ VariableID image_id = combined ? combined->image : img;
if (combined && image_is_comparison(imgtype, combined->image))
image_is_depth = true;
@@ -4718,29 +5082,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
image_is_depth = true;
if (image_is_depth)
- expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
+ expr = remap_swizzle(result_type, 1, expr);
+ }
+
+ if (!backend.support_small_type_sampling_result && result_type.width < 32)
+ {
+ // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+ // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+ expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
}
// Deals with reads from MSL. We might need to downconvert to fewer components.
if (op == OpImageRead)
- expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+ expr = remap_swizzle(result_type, 4, expr);
- emit_op(result_type, id, expr, forward);
- for (auto &inherit : inherited_expressions)
- inherit_expression_dependencies(id, inherit);
-
- switch (op)
- {
- case OpImageSampleDrefImplicitLod:
- case OpImageSampleImplicitLod:
- case OpImageSampleProjImplicitLod:
- case OpImageSampleProjDrefImplicitLod:
- register_control_dependent_expression(id);
- break;
-
- default:
- break;
- }
+ return expr;
}
bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
@@ -4753,10 +5109,13 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
// For some subclasses, the function is a method on the specified image.
-string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool,
- uint32_t lod)
+ uint32_t lod, uint32_t minlod)
{
+ if (minlod != 0)
+ SPIRV_CROSS_THROW("Sparse texturing not yet supported.");
+
string fname;
// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
@@ -4812,10 +5171,19 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
{
if (options.vulkan_semantics)
{
- // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
if (dummy_sampler_id)
- SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining.");
- require_extension_internal("GL_EXT_samplerless_texture_functions");
+ {
+ // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
+ auto sampled_type = type;
+ sampled_type.basetype = SPIRType::SampledImage;
+ return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
+ to_expression(dummy_sampler_id), ")");
+ }
+ else
+ {
+ // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
+ require_extension_internal("GL_EXT_samplerless_texture_functions");
+ }
}
else
{
@@ -4832,10 +5200,11 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
}
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
- uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward)
+ uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/,
+ bool *p_forward)
{
string farg_str;
if (is_fetch)
@@ -5112,7 +5481,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
case GLSLstd450ModfStruct:
{
- forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
emit_uninitialized_temporary_expression(result_type, id);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
@@ -5252,7 +5620,6 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
case GLSLstd450FrexpStruct:
{
- forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
emit_uninitialized_temporary_expression(result_type, id);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
@@ -5261,8 +5628,28 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
}
case GLSLstd450Ldexp:
- emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
+ {
+ bool forward = should_forward(args[0]) && should_forward(args[1]);
+
+ auto op0 = to_unpacked_expression(args[0]);
+ auto op1 = to_unpacked_expression(args[1]);
+ auto &op1_type = expression_type(args[1]);
+ if (op1_type.basetype != SPIRType::Int)
+ {
+ // Need a value cast here.
+ auto target_type = op1_type;
+ target_type.basetype = SPIRType::Int;
+ op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
+ }
+
+ auto expr = join("ldexp(", op0, ", ", op1, ")");
+
+ emit_op(result_type, id, expr, forward);
+ inherit_expression_dependencies(id, args[0]);
+ inherit_expression_dependencies(id, args[1]);
break;
+ }
+
case GLSLstd450PackSnorm4x8:
emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
break;
@@ -5326,7 +5713,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
// Bit-fiddling
case GLSLstd450FindILsb:
- emit_unary_func_op(result_type, id, args[0], "findLSB");
+ // findLSB always returns int.
+ emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
break;
case GLSLstd450FindSMsb:
@@ -5808,13 +6196,38 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
// Floating <-> Integer special casts. Just have to enumerate all cases. :(
// 16-bit, 32-bit and 64-bit floats.
if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
+ {
+ if (is_legacy_es())
+ SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
+ else if (!options.es && options.version < 330)
+ require_extension_internal("GL_ARB_shader_bit_encoding");
return "floatBitsToUint";
+ }
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
+ {
+ if (is_legacy_es())
+ SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
+ else if (!options.es && options.version < 330)
+ require_extension_internal("GL_ARB_shader_bit_encoding");
return "floatBitsToInt";
+ }
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
+ {
+ if (is_legacy_es())
+ SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
+ else if (!options.es && options.version < 330)
+ require_extension_internal("GL_ARB_shader_bit_encoding");
return "uintBitsToFloat";
+ }
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
+ {
+ if (is_legacy_es())
+ SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
+ else if (!options.es && options.version < 330)
+ require_extension_internal("GL_ARB_shader_bit_encoding");
return "intBitsToFloat";
+ }
+
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
return "doubleBitsToInt64";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
@@ -6114,6 +6527,43 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
case BuiltInIncomingRayFlagsNV:
return "gl_IncomingRayFlagsNV";
+ case BuiltInBaryCoordNV:
+ {
+ if (options.es && options.version < 320)
+ SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
+ else if (!options.es && options.version < 450)
+ SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
+ require_extension_internal("GL_NV_fragment_shader_barycentric");
+ return "gl_BaryCoordNV";
+ }
+
+ case BuiltInBaryCoordNoPerspNV:
+ {
+ if (options.es && options.version < 320)
+ SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
+ else if (!options.es && options.version < 450)
+ SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
+ require_extension_internal("GL_NV_fragment_shader_barycentric");
+ return "gl_BaryCoordNoPerspNV";
+ }
+
+ case BuiltInFragStencilRefEXT:
+ {
+ if (!options.es)
+ {
+ require_extension_internal("GL_ARB_shader_stencil_export");
+ return "gl_FragStencilRefARB";
+ }
+ else
+ SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
+ }
+
+ case BuiltInDeviceIndex:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
+ require_extension_internal("GL_EXT_device_group");
+ return "gl_DeviceIndex";
+
default:
return join("gl_BuiltIn_", convert_to_string(builtin));
}
@@ -6147,7 +6597,16 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
if (!chain_only)
+ {
+ // We handle transpose explicitly, so don't resolve that here.
+ auto *e = maybe_get<SPIRExpression>(base);
+ bool old_transpose = e && e->need_transpose;
+ if (e)
+ e->need_transpose = false;
expr = to_enclosed_expression(base, register_expression_read);
+ if (e)
+ e->need_transpose = old_transpose;
+ }
// Start traversing type hierarchy at the proper non-pointer types,
// but keep type_id referencing the original pointer for use below.
@@ -6171,8 +6630,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
- bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked);
- uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType);
+ bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
+ uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
bool is_invariant = has_decoration(base, DecorationInvariant);
bool pending_array_enclose = false;
bool dimension_flatten = false;
@@ -6344,9 +6803,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
BuiltIn builtin;
if (is_member_builtin(*type, index, &builtin))
{
- // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
- // To make this properly work by omitting all OpName opcodes,
- // we need to infer gl_in or gl_out based on the builtin, and stage.
if (access_chain_is_arrayed)
{
expr += ".";
@@ -6368,11 +6824,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
if (has_member_decoration(type->self, index, DecorationInvariant))
is_invariant = true;
- is_packed = member_is_packed_type(*type, index);
- if (is_packed)
- packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType);
+ is_packed = member_is_packed_physical_type(*type, index);
+ if (member_is_remapped_physical_type(*type, index))
+ physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
else
- packed_type = 0;
+ physical_type = 0;
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
@@ -6380,13 +6836,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
// Matrix -> Vector
else if (type->columns > 1)
{
- if (row_major_matrix_needs_conversion)
- {
- expr = convert_row_major_matrix(expr, *type, is_packed);
- row_major_matrix_needs_conversion = false;
- is_packed = false;
- packed_type = 0;
- }
+ // If we have a row-major matrix here, we need to defer any transpose in case this access chain
+ // is used to store a column. We can resolve it right here and now if we access a scalar directly,
+ // by flipping indexing order of the matrix.
expr += "[";
if (index_is_literal)
@@ -6401,16 +6853,36 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
// Vector -> Scalar
else if (type->vecsize > 1)
{
- if (index_is_literal && !is_packed)
+ string deferred_index;
+ if (row_major_matrix_needs_conversion)
+ {
+ // Flip indexing order.
+ auto column_index = expr.find_last_of('[');
+ if (column_index != string::npos)
+ {
+ deferred_index = expr.substr(column_index);
+ expr.resize(column_index);
+ }
+ }
+
+ if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
{
expr += ".";
expr += index_to_swizzle(index);
}
- else if (ir.ids[index].get_type() == TypeConstant && !is_packed)
+ else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
{
auto &c = get<SPIRConstant>(index);
- expr += ".";
- expr += index_to_swizzle(c.scalar());
+ if (c.specialization)
+ {
+ // If the index is a spec constant, we cannot turn extract into a swizzle.
+ expr += join("[", to_expression(index), "]");
+ }
+ else
+ {
+ expr += ".";
+ expr += index_to_swizzle(c.scalar());
+ }
}
else if (index_is_literal)
{
@@ -6424,8 +6896,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
expr += "]";
}
+ expr += deferred_index;
+ row_major_matrix_needs_conversion = false;
+
is_packed = false;
- packed_type = 0;
+ physical_type = 0;
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
@@ -6445,7 +6920,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
meta->need_transpose = row_major_matrix_needs_conversion;
meta->storage_is_packed = is_packed;
meta->storage_is_invariant = is_invariant;
- meta->storage_packed_type = packed_type;
+ meta->storage_physical_type = physical_type;
}
return expr;
@@ -6587,7 +7062,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
// Cannot forward transpositions, so resolve them here.
if (need_transpose)
- expr += convert_row_major_matrix(tmp, member_type, false);
+ expr += convert_row_major_matrix(tmp, member_type, 0, false);
else
expr += tmp;
}
@@ -6903,7 +7378,7 @@ bool CompilerGLSL::should_dereference(uint32_t id)
return true;
}
-bool CompilerGLSL::should_forward(uint32_t id)
+bool CompilerGLSL::should_forward(uint32_t id) const
{
// If id is a variable we will try to forward it regardless of force_temporary check below
// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
@@ -6922,6 +7397,12 @@ bool CompilerGLSL::should_forward(uint32_t id)
return false;
}
+bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
+{
+ // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
+ return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
+}
+
void CompilerGLSL::track_expression_read(uint32_t id)
{
switch (ir.ids[id].get_type())
@@ -6948,7 +7429,7 @@ void CompilerGLSL::track_expression_read(uint32_t id)
// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
- if (expression_is_forwarded(id))
+ if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
{
auto &v = expression_usage_counts[id];
v++;
@@ -7019,18 +7500,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
return expr;
}
+void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
+{
+ if (var.allocate_temporary_copy)
+ {
+ auto &type = get<SPIRType>(var.basetype);
+ auto &flags = get_decoration_bitset(var.self);
+ statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+ }
+}
+
void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->deferred_declaration)
{
statement(variable_decl_function_local(*var), ";");
- if (var->allocate_temporary_copy)
- {
- auto &type = get<SPIRType>(var->basetype);
- auto &flags = ir.meta[id].decoration.decoration_flags;
- statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";");
- }
+ emit_variable_temporary_copies(*var);
var->deferred_declaration = false;
}
}
@@ -7140,7 +7626,7 @@ bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
{
- uint32_t base = 0;
+ ID base = 0;
string op;
string subop;
@@ -7198,10 +7684,10 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32
if (i)
op += ", ";
- subop = to_expression(elems[i]);
+ subop = to_composite_constructor_expression(elems[i]);
}
- base = e ? e->base_expression : 0;
+ base = e ? e->base_expression : ID(0);
}
if (swizzle_optimization)
@@ -7285,14 +7771,19 @@ void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
{
- if (forwarded_temporaries.count(expr.self))
+ // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
+ // these will be marked as having suppressed usage tracking.
+ // Our only concern is to make sure arithmetic operations are done in similar ways.
+ if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
+ forced_invariant_temporaries.count(expr.self) == 0)
{
forced_temporaries.insert(expr.self);
+ forced_invariant_temporaries.insert(expr.self);
force_recompile();
- }
- for (auto &dependent : expr.expression_dependencies)
- disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
+ for (auto &dependent : expr.expression_dependencies)
+ disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
+ }
}
void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
@@ -7357,6 +7848,10 @@ uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &inst
case OpSLessThanEqual:
case OpSGreaterThan:
case OpSGreaterThanEqual:
+ case OpULessThan:
+ case OpULessThanEqual:
+ case OpUGreaterThan:
+ case OpUGreaterThanEqual:
return expression_type(ops[2]).width;
default:
@@ -7442,19 +7937,39 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
bool old_need_transpose = false;
auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
- if (ptr_expression && ptr_expression->need_transpose)
+
+ if (forward)
{
- old_need_transpose = true;
- ptr_expression->need_transpose = false;
- need_transpose = true;
+ // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
+ // taking the expression.
+ if (ptr_expression && ptr_expression->need_transpose)
+ {
+ old_need_transpose = true;
+ ptr_expression->need_transpose = false;
+ need_transpose = true;
+ }
+ else if (is_non_native_row_major_matrix(ptr))
+ need_transpose = true;
}
- else if (is_non_native_row_major_matrix(ptr))
- need_transpose = true;
// If we are forwarding this load,
// don't register the read to access chain here, defer that to when we actually use the expression,
// using the add_implied_read_expression mechanism.
- auto expr = to_dereferenced_expression(ptr, !forward);
+ string expr;
+
+ bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
+ bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
+ if (forward || (!is_packed && !is_remapped))
+ {
+ // For the simple case, we do not need to deal with repacking.
+ expr = to_dereferenced_expression(ptr, false);
+ }
+ else
+ {
+ // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
+ // storing the expression to a temporary.
+ expr = to_unpacked_expression(ptr);
+ }
// We might need to bitcast in order to load from a builtin.
bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
@@ -7465,10 +7980,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
unroll_array_from_complex_load(id, ptr, expr);
auto &type = get<SPIRType>(result_type);
- if (has_decoration(id, DecorationNonUniformEXT))
+ // Shouldn't need to check for ID, but current glslang codegen requires it in some cases
+ // when loading Image/Sampler descriptors. It does not hurt to check ID as well.
+ if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
+ {
+ propagate_nonuniform_qualifier(ptr);
convert_non_uniform_expression(type, expr);
+ }
- if (ptr_expression)
+ if (forward && ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
@@ -7484,7 +8004,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// it is an array, and our backend does not support arrays as value types.
// Emit the temporary, and copy it explicitly.
e = &emit_uninitialized_temporary_expression(result_type, id);
- emit_array_copy(to_expression(id), ptr);
+ emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr));
}
else
e = &emit_op(result_type, id, expr, forward, !usage_tracking);
@@ -7492,12 +8012,22 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
e->need_transpose = need_transpose;
register_read(id, ptr, forward);
- // Pass through whether the result is of a packed type.
- if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked))
+ if (forward)
{
- set_extended_decoration(id, SPIRVCrossDecorationPacked);
- set_extended_decoration(id, SPIRVCrossDecorationPackedType,
- get_extended_decoration(ptr, SPIRVCrossDecorationPackedType));
+ // Pass through whether the result is of a packed type and the physical type ID.
+ if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
+ set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+ if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
+ {
+ set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
+ get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
+ }
+ }
+ else
+ {
+ // This might have been set on an earlier compilation iteration, force it to be unset.
+ unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+ unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
}
inherit_expression_dependencies(id, ptr);
@@ -7523,24 +8053,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
auto *backing_variable = maybe_get_backing_variable(ops[2]);
- expr.loaded_from = backing_variable ? backing_variable->self : ops[2];
+ expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
expr.need_transpose = meta.need_transpose;
expr.access_chain = true;
// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
if (meta.storage_is_packed)
- set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
- if (meta.storage_packed_type != 0)
- set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+ set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+ if (meta.storage_physical_type != 0)
+ set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
if (meta.storage_is_invariant)
set_decoration(ops[1], DecorationInvariant);
+ // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
+ // temporary which could be subject to invalidation.
+ // Need to assume we're forwarded while calling inherit_expression_depdendencies.
+ forwarded_temporaries.insert(ops[1]);
+ // The access chain itself is never forced to a temporary, but its dependencies might.
+ suppressed_usage_tracking.insert(ops[1]);
+
for (uint32_t i = 2; i < length; i++)
{
inherit_expression_dependencies(ops[1], ops[i]);
add_implied_read_expression(expr, ops[i]);
}
+ // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
+ // we're not forwarded after all.
+ if (expr.expression_dependencies.empty())
+ forwarded_temporaries.erase(ops[1]);
+
break;
}
@@ -7548,6 +8090,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
auto *var = maybe_get<SPIRVariable>(ops[0]);
+ if (has_decoration(ops[0], DecorationNonUniformEXT))
+ propagate_nonuniform_qualifier(ops[0]);
+
if (var && var->statically_assigned)
var->static_expression = ops[1];
else if (var && var->loop_variable && !var->loop_variable_enable)
@@ -7637,13 +8182,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (skip_argument(arg[i]))
continue;
- arglist.push_back(to_func_call_arg(arg[i]));
+ arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
}
for (auto &combined : callee.combined_parameters)
{
- uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
- uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
+ auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
+ auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
}
@@ -7746,15 +8291,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
forward = false;
string constructor_op;
- if (!backend.array_is_value_type && out_type.array.size() > 1)
- {
- // We cannot construct array of arrays because we cannot treat the inputs
- // as value types. Need to declare the array-of-arrays, and copy in elements one by one.
- emit_uninitialized_temporary_expression(result_type, id);
- for (uint32_t i = 0; i < length; i++)
- emit_array_copy(join(to_expression(id), "[", i, "]"), elems[i]);
- }
- else if (backend.use_initializer_list && composite)
+ if (backend.use_initializer_list && composite)
{
// Only use this path if we are building composites.
// This path cannot be used for arithmetic.
@@ -7764,14 +8301,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (type_is_empty(out_type) && !backend.supports_empty_struct)
constructor_op += "0";
else if (splat)
- constructor_op += to_expression(elems[0]);
+ constructor_op += to_unpacked_expression(elems[0]);
else
constructor_op += build_composite_combiner(result_type, elems, length);
constructor_op += " }";
}
else if (swizzle_splat && !composite)
{
- constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0]));
+ constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
}
else
{
@@ -7779,7 +8316,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (type_is_empty(out_type) && !backend.supports_empty_struct)
constructor_op += "0";
else if (splat)
- constructor_op += to_expression(elems[0]);
+ constructor_op += to_unpacked_expression(elems[0]);
else
constructor_op += build_composite_combiner(result_type, elems, length);
constructor_op += ")";
@@ -7841,7 +8378,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
allow_base_expression = false;
// Packed expressions cannot be split up.
- if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked))
+ if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked))
+ allow_base_expression = false;
+
+ // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
+ // into the base expression.
+ if (is_non_native_row_major_matrix(ops[2]))
allow_base_expression = false;
AccessChainMeta meta;
@@ -7864,14 +8406,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// from expression causing it to be forced to an actual temporary in GLSL.
auto expr = access_chain_internal(ops[2], &ops[3], length,
ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
- e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
+ e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
inherit_expression_dependencies(id, ops[2]);
e->base_expression = ops[2];
}
else
{
auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
- e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
+ e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
inherit_expression_dependencies(id, ops[2]);
}
@@ -7880,9 +8422,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// instead of loading everything through an access chain.
e->need_transpose = meta.need_transpose;
if (meta.storage_is_packed)
- set_extended_decoration(id, SPIRVCrossDecorationPacked);
- if (meta.storage_packed_type != 0)
- set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+ set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+ if (meta.storage_physical_type != 0)
+ set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
if (meta.storage_is_invariant)
set_decoration(id, DecorationInvariant);
@@ -7930,13 +8472,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
uint32_t rhs = ops[2];
bool pointer = get<SPIRType>(result_type).pointer;
- if (expression_is_lvalue(rhs) && !pointer)
+ auto *chain = maybe_get<SPIRAccessChain>(rhs);
+ if (chain)
+ {
+ // Cannot lower to a SPIRExpression, just copy the object.
+ auto &e = set<SPIRAccessChain>(id, *chain);
+ e.self = id;
+ }
+ else if (expression_is_lvalue(rhs) && !pointer)
{
// Need a copy.
// For pointer types, we copy the pointer itself.
- statement(declare_temporary(result_type, id), to_expression(rhs), ";");
+ statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
- inherit_expression_dependencies(id, rhs);
}
else
{
@@ -7947,7 +8495,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (pointer)
{
auto *var = maybe_get_backing_variable(rhs);
- e.loaded_from = var ? var->self : 0;
+ e.loaded_from = var ? var->self : ID(0);
+ }
+
+ // If we're copying an access chain, need to inherit the read expressions.
+ auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
+ if (rhs_expr)
+ {
+ e.implied_read_expressions = rhs_expr->implied_read_expressions;
+ e.expression_dependencies = rhs_expr->expression_dependencies;
}
}
break;
@@ -7972,7 +8528,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
shuffle = true;
// Cannot use swizzles with packed expressions, force shuffle path.
- if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked))
+ if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
shuffle = true;
string expr;
@@ -7981,7 +8537,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (shuffle)
{
should_fwd = should_forward(vec0) && should_forward(vec1);
- trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
+ trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
// Constructor style and shuffling from two different vectors.
SmallVector<string> args;
@@ -7994,7 +8550,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// a value we might not need, and bog down codegen.
SPIRConstant c;
c.constant_type = type0.parent_type;
- assert(type0.parent_type != 0);
+ assert(type0.parent_type != ID(0));
args.push_back(constant_expression(c));
}
else if (elems[i] >= type0.vecsize)
@@ -8007,7 +8563,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
else
{
should_fwd = should_forward(vec0);
- trivial_forward = !expression_is_forwarded(vec0);
+ trivial_forward = should_suppress_usage_tracking(vec0);
// We only source from first vector, so can use swizzle.
// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
@@ -8027,8 +8583,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
emit_op(result_type, id, expr, should_fwd, trivial_forward);
+
inherit_expression_dependencies(id, vec0);
- inherit_expression_dependencies(id, vec1);
+ if (vec0 != vec1)
+ inherit_expression_dependencies(id, vec1);
break;
}
@@ -8084,18 +8642,56 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (e && e->need_transpose)
{
e->need_transpose = false;
- emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
+ string expr;
+
+ if (opcode == OpMatrixTimesVector)
+ expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
+ enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+ else
+ expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+ to_enclosed_unpacked_expression(ops[2]));
+
+ bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+ emit_op(ops[0], ops[1], expr, forward);
e->need_transpose = true;
+ inherit_expression_dependencies(ops[1], ops[2]);
+ inherit_expression_dependencies(ops[1], ops[3]);
}
else
GLSL_BOP(*);
break;
}
+ case OpMatrixTimesMatrix:
+ {
+ auto *a = maybe_get<SPIRExpression>(ops[2]);
+ auto *b = maybe_get<SPIRExpression>(ops[3]);
+
+ // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
+ // a^T * b^T = (b * a)^T.
+ if (a && b && a->need_transpose && b->need_transpose)
+ {
+ a->need_transpose = false;
+ b->need_transpose = false;
+ auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+ enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+ bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+ auto &e = emit_op(ops[0], ops[1], expr, forward);
+ e.need_transpose = true;
+ a->need_transpose = true;
+ b->need_transpose = true;
+ inherit_expression_dependencies(ops[1], ops[2]);
+ inherit_expression_dependencies(ops[1], ops[3]);
+ }
+ else
+ GLSL_BOP(*);
+
+ break;
+ }
+
case OpFMul:
case OpMatrixTimesScalar:
case OpVectorTimesScalar:
- case OpMatrixTimesMatrix:
GLSL_BOP(*);
break;
@@ -8170,7 +8766,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
uint32_t result_id = ops[1];
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
- forced_temporaries.insert(result_id);
auto &type = get<SPIRType>(result_type);
emit_uninitialized_temporary_expression(result_type, result_id);
const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
@@ -8279,7 +8874,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
else
GLSL_BOP(||);
break;
@@ -8293,7 +8888,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
else
GLSL_BOP(&&);
break;
@@ -8350,7 +8945,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpUGreaterThan:
case OpSGreaterThan:
{
- auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpUGreaterThan ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
GLSL_BFOP_CAST(greaterThan, type);
else
@@ -8370,7 +8965,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpUGreaterThanEqual:
case OpSGreaterThanEqual:
{
- auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
GLSL_BFOP_CAST(greaterThanEqual, type);
else
@@ -8390,7 +8985,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpULessThan:
case OpSLessThan:
{
- auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpULessThan ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
GLSL_BFOP_CAST(lessThan, type);
else
@@ -8410,7 +9005,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpULessThanEqual:
case OpSLessThanEqual:
{
- auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpULessThanEqual ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
GLSL_BFOP_CAST(lessThanEqual, type);
else
@@ -8618,23 +9213,36 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// Bitfield
case OpBitFieldInsert:
- // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
- GLSL_QFOP(bitfieldInsert);
+ {
+ emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
break;
+ }
case OpBitFieldSExtract:
+ {
+ emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
+ SPIRType::Int, SPIRType::Int);
+ break;
+ }
+
case OpBitFieldUExtract:
- // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
- GLSL_TFOP(bitfieldExtract);
+ {
+ emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
+ SPIRType::Int, SPIRType::Int);
break;
+ }
case OpBitReverse:
+ // BitReverse does not have issues with sign since result type must match input type.
GLSL_UFOP(bitfieldReverse);
break;
case OpBitCount:
- GLSL_UFOP(bitCount);
+ {
+ auto basetype = expression_type(ops[2]).basetype;
+ emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
break;
+ }
// Atomics
case OpAtomicExchange:
@@ -8823,7 +9431,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// When using the image, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
- e.loaded_from = var ? var->self : 0;
+ e.loaded_from = var ? var->self : ID(0);
break;
}
@@ -8883,6 +9491,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_sampled_image_op(result_type, id, ops[2], ops[3]);
+ inherit_expression_dependencies(id, ops[2]);
+ inherit_expression_dependencies(id, ops[3]);
break;
}
@@ -9044,7 +9654,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// When using the pointer, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
- e.loaded_from = var ? var->self : 0;
+ e.loaded_from = var ? var->self : ID(0);
break;
}
@@ -9304,6 +9914,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
+ else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
+ {
+ break; // Ignore SPIR-V debug information extended instructions.
+ }
else
{
statement("// unimplemented ext op ", instruction.op);
@@ -9495,28 +10109,98 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
case OpFUnordEqual:
- GLSL_BFOP(unsupported_FUnordEqual);
- break;
-
case OpFUnordNotEqual:
- GLSL_BFOP(unsupported_FUnordNotEqual);
- break;
-
case OpFUnordLessThan:
- GLSL_BFOP(unsupported_FUnordLessThan);
- break;
-
case OpFUnordGreaterThan:
- GLSL_BFOP(unsupported_FUnordGreaterThan);
- break;
-
case OpFUnordLessThanEqual:
- GLSL_BFOP(unsupported_FUnordLessThanEqual);
- break;
-
case OpFUnordGreaterThanEqual:
- GLSL_BFOP(unsupported_FUnordGreaterThanEqual);
+ {
+ // GLSL doesn't specify if floating point comparisons are ordered or unordered,
+ // but glslang always emits ordered floating point compares for GLSL.
+ // To get unordered compares, we can test the opposite thing and invert the result.
+ // This way, we force true when there is any NaN present.
+ uint32_t op0 = ops[2];
+ uint32_t op1 = ops[3];
+
+ string expr;
+ if (expression_type(op0).vecsize > 1)
+ {
+ const char *comp_op = nullptr;
+ switch (opcode)
+ {
+ case OpFUnordEqual:
+ comp_op = "notEqual";
+ break;
+
+ case OpFUnordNotEqual:
+ comp_op = "equal";
+ break;
+
+ case OpFUnordLessThan:
+ comp_op = "greaterThanEqual";
+ break;
+
+ case OpFUnordLessThanEqual:
+ comp_op = "greaterThan";
+ break;
+
+ case OpFUnordGreaterThan:
+ comp_op = "lessThanEqual";
+ break;
+
+ case OpFUnordGreaterThanEqual:
+ comp_op = "lessThan";
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
+ }
+ else
+ {
+ const char *comp_op = nullptr;
+ switch (opcode)
+ {
+ case OpFUnordEqual:
+ comp_op = " != ";
+ break;
+
+ case OpFUnordNotEqual:
+ comp_op = " == ";
+ break;
+
+ case OpFUnordLessThan:
+ comp_op = " >= ";
+ break;
+
+ case OpFUnordLessThanEqual:
+ comp_op = " > ";
+ break;
+
+ case OpFUnordGreaterThan:
+ comp_op = " <= ";
+ break;
+
+ case OpFUnordGreaterThanEqual:
+ comp_op = " < ";
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
+ }
+
+ emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
+ inherit_expression_dependencies(ops[1], op0);
+ inherit_expression_dependencies(ops[1], op1);
break;
+ }
case OpReportIntersectionNV:
statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
@@ -9564,6 +10248,57 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// Undefined value has been declared.
break;
+ case OpLine:
+ {
+ emit_line_directive(ops[0], ops[1]);
+ break;
+ }
+
+ case OpNoLine:
+ break;
+
+ case OpDemoteToHelperInvocationEXT:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+ require_extension_internal("GL_EXT_demote_to_helper_invocation");
+ statement(backend.demote_literal, ";");
+ break;
+
+ case OpIsHelperInvocationEXT:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+ require_extension_internal("GL_EXT_demote_to_helper_invocation");
+ emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
+ break;
+
+ case OpBeginInvocationInterlockEXT:
+ // If the interlock is complex, we emit this elsewhere.
+ if (!interlocked_is_complex)
+ {
+ if (options.es)
+ statement("beginInvocationInterlockNV();");
+ else
+ statement("beginInvocationInterlockARB();");
+
+ flush_all_active_variables();
+ // Make sure forwarding doesn't propagate outside interlock region.
+ }
+ break;
+
+ case OpEndInvocationInterlockEXT:
+ // If the interlock is complex, we emit this elsewhere.
+ if (!interlocked_is_complex)
+ {
+ if (options.es)
+ statement("endInvocationInterlockNV();");
+ else
+ statement("endInvocationInterlockARB();");
+
+ flush_all_active_variables();
+ // Make sure forwarding doesn't propagate outside interlock region.
+ }
+ break;
+
default:
statement("// unimplemented op ", instruction.op);
break;
@@ -9593,12 +10328,18 @@ void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t in
if (var_id)
flush_variable_declaration(var_id);
- arglist.push_back(to_func_call_arg(arg.id));
+ arglist.push_back(to_func_call_arg(arg, arg.id));
}
}
string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
{
+ if (type.type_alias != TypeID(0) &&
+ !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
+ {
+ return to_member_name(get<SPIRType>(type.type_alias), index);
+ }
+
auto &memb = ir.meta[type.self].members;
if (index < memb.size() && !memb[index].alias.empty())
return memb[index].alias;
@@ -9674,21 +10415,50 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u
return true;
}
+// Checks if we need to remap physical type IDs when declaring the type in a buffer.
+bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
+{
+ return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+}
+
// Checks whether the member is in packed data type, that might need to be unpacked.
-// GLSL does not define packed data types, but certain subclasses do.
-bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
+bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
{
- return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked);
+ return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
}
// Wraps the expression string in a function call that converts the
// row_major matrix result of the expression to a column_major matrix.
// Base implementation uses the standard library transpose() function.
// Subclasses may override to use a different function.
-string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
+string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
+ bool /*is_packed*/)
{
strip_enclosed_expression(exp_str);
- return join("transpose(", exp_str, ")");
+ if (!is_matrix(exp_type))
+ {
+ auto column_index = exp_str.find_last_of('[');
+ if (column_index == string::npos)
+ return exp_str;
+
+ auto column_expr = exp_str.substr(column_index);
+ exp_str.resize(column_index);
+
+ auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
+
+ // Loading a column from a row-major matrix. Unroll the load.
+ for (uint32_t c = 0; c < exp_type.vecsize; c++)
+ {
+ transposed_expr += join(exp_str, '[', c, ']', column_expr);
+ if (c + 1 < exp_type.vecsize)
+ transposed_expr += ", ";
+ }
+
+ transposed_expr += ")";
+ return transposed_expr;
+ }
+ else
+ return join("transpose(", exp_str, ")");
}
string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
@@ -9721,6 +10491,10 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
variable_decl(membertype, to_member_name(type, index)), ";");
}
+void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
+{
+}
+
const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
{
// GL_EXT_buffer_reference variables can be marked as restrict.
@@ -9779,7 +10553,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
- return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
+ auto &type = expression_type(id);
+ bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
+ if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+ {
+ // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+ auto &result_type = get<SPIRType>(type.image.type);
+ if (result_type.width < 32)
+ return "mediump ";
+ }
+ return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
}
string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@@ -9995,15 +10778,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
switch (imagetype.basetype)
{
case SPIRType::Int:
+ case SPIRType::Short:
+ case SPIRType::SByte:
res = "i";
break;
case SPIRType::UInt:
+ case SPIRType::UShort:
+ case SPIRType::UByte:
res = "u";
break;
default:
break;
}
+ // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+ // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
+
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
return res + "subpassInput" + (type.image.ms ? "MS" : "");
@@ -10312,7 +11102,7 @@ void CompilerGLSL::require_extension_internal(const string &ext)
}
}
-void CompilerGLSL::flatten_buffer_block(uint32_t id)
+void CompilerGLSL::flatten_buffer_block(VariableID id)
{
auto &var = get<SPIRVariable>(id);
auto &type = get<SPIRType>(var.basetype);
@@ -10428,7 +11218,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
if (func.self == ir.default_entry_point)
{
- decl += "main";
+ // If we need complex fallback in GLSL, we just wrap main() in a function
+ // and interlock the entire shader ...
+ if (interlocked_is_complex)
+ decl += "spvMainInterlockedBody";
+ else
+ decl += "main";
+
processing_entry_point = true;
}
else
@@ -10503,6 +11299,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
}
}
+ if (func.entry_line.file_id != 0)
+ emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
emit_function_prototype(func, return_flags);
begin_scope();
@@ -10523,6 +11321,8 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);
+ var.deferred_declaration = false;
+
if (var.storage == StorageClassWorkgroup)
{
// Special variable type which cannot have initializer,
@@ -10582,15 +11382,29 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
var.deferred_declaration = false;
}
+ // Enforce declaration order for regression testing purposes.
+ for (auto &block_id : func.blocks)
+ {
+ auto &block = get<SPIRBlock>(block_id);
+ sort(begin(block.dominated_variables), end(block.dominated_variables));
+ }
+
for (auto &line : current_function->fixup_hooks_in)
line();
- entry_block.loop_dominator = SPIRBlock::NoDominator;
emit_block_chain(entry_block);
end_scope();
processing_entry_point = false;
statement("");
+
+ // Make sure deferred declaration state for local variables is cleared when we are done with function.
+ // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
+ for (auto &v : func.local_variables)
+ {
+ auto &var = get<SPIRVariable>(v);
+ var.deferred_declaration = false;
+ }
}
void CompilerGLSL::emit_fixup()
@@ -10609,18 +11423,11 @@ void CompilerGLSL::emit_fixup()
}
}
-bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
-{
- auto &child = get<SPIRBlock>(to);
- for (auto &phi : child.phi_variables)
- if (phi.parent == from)
- return true;
- return false;
-}
-
-void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
+void CompilerGLSL::flush_phi(BlockID from, BlockID to)
{
auto &child = get<SPIRBlock>(to);
+ if (child.ignore_phi_from_block == from)
+ return;
unordered_set<uint32_t> temporary_phi_variables;
@@ -10645,7 +11452,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
bool need_saved_temporary =
find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
- return future_phi.local_variable == phi.function_variable && future_phi.parent == from;
+ return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
}) != end(child.phi_variables);
if (need_saved_temporary)
@@ -10680,7 +11487,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
}
}
-void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
+void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
{
auto &to_block = get<SPIRBlock>(to);
if (from == to)
@@ -10691,16 +11498,11 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
{
// Just emit the whole block chain as is.
auto usage_counts = expression_usage_counts;
- auto invalid = invalid_expressions;
emit_block_chain(to_block);
- // Expression usage counts and invalid expressions
- // are moot after returning from the continue block.
- // Since we emit the same block multiple times,
- // we don't want to invalidate ourselves.
+ // Expression usage counts are moot after returning from the continue block.
expression_usage_counts = usage_counts;
- invalid_expressions = invalid;
}
else
{
@@ -10715,23 +11517,23 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
// so just use "self" here.
loop_dominator = from;
}
- else if (from_block.loop_dominator != SPIRBlock::NoDominator)
+ else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
{
loop_dominator = from_block.loop_dominator;
}
if (loop_dominator != 0)
{
- auto &dominator = get<SPIRBlock>(loop_dominator);
+ auto &cfg = get_cfg_for_current_function();
// For non-complex continue blocks, we implicitly branch to the continue block
// by having the continue block be part of the loop header in for (; ; continue-block).
- outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
+ outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
}
// Some simplification for for-loops. We always end up with a useless continue;
// statement since we branch to a loop block.
- // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
+ // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
// we can avoid writing out an explicit continue statement.
// Similar optimization to return statements if we know we're outside flow control.
if (!outside_control_flow)
@@ -10739,11 +11541,12 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
}
}
-void CompilerGLSL::branch(uint32_t from, uint32_t to)
+void CompilerGLSL::branch(BlockID from, BlockID to)
{
flush_phi(from, to);
flush_control_dependent_expressions(from);
- flush_all_active_variables();
+
+ bool to_is_continue = is_continue(to);
// This is only a continue if we branch to our loop dominator.
if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
@@ -10760,7 +11563,8 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
// write to the ladder here, and defer the break.
// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
- if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u &&
+ if (current_emitting_switch && is_loop_break(to) &&
+ current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
{
if (!current_emitting_switch->need_ladder_break)
@@ -10773,12 +11577,25 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
}
statement("break;");
}
- else if (is_continue(to) || (from == to))
+ else if (to_is_continue || from == to)
{
// For from == to case can happen for a do-while loop which branches into itself.
// We don't mark these cases as continue blocks, but the only possible way to branch into
// ourselves is through means of continue blocks.
- branch_to_continue(from, to);
+
+ // If we are merging to a continue block, there is no need to emit the block chain for continue here.
+ // We can branch to the continue block after we merge execution.
+
+ // Here we make use of structured control flow rules from spec:
+ // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
+ // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
+ // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
+ auto &block_meta = ir.block_meta[to];
+ bool branching_to_merge =
+ (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
+ ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
+ if (!to_is_continue || !branching_to_merge)
+ branch_to_continue(from, to);
}
else if (!is_conditional(to))
emit_block_chain(get<SPIRBlock>(to));
@@ -10789,12 +11606,19 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
// Inner scope always takes precedence.
}
-void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
+void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
{
- // If we branch directly to a selection merge target, we don't really need a code path.
+ auto &from_block = get<SPIRBlock>(from);
+ BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
+
+ // If we branch directly to a selection merge target, we don't need a code path.
+ // This covers both merge out of if () / else () as well as a break for switch blocks.
bool true_sub = !is_conditional(true_block);
bool false_sub = !is_conditional(false_block);
+ bool true_block_is_selection_merge = true_block == merge_block;
+ bool false_block_is_selection_merge = false_block == merge_block;
+
if (true_sub)
{
emit_block_hints(get<SPIRBlock>(from));
@@ -10803,7 +11627,11 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
branch(from, true_block);
end_scope();
- if (false_sub || is_continue(false_block) || is_break(false_block))
+ // If we merge to continue, we handle that explicitly in emit_block_chain(),
+ // so there is no need to branch to it directly here.
+ // break; is required to handle ladder fallthrough cases, so keep that in for now, even
+ // if we could potentially handle it in emit_block_chain().
+ if (false_sub || (!false_block_is_selection_merge && is_continue(false_block)) || is_break(false_block))
{
statement("else");
begin_scope();
@@ -10818,7 +11646,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
end_scope();
}
}
- else if (false_sub && !true_sub)
+ else if (false_sub)
{
// Only need false path, use negative conditional.
emit_block_hints(get<SPIRBlock>(from));
@@ -10827,7 +11655,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
branch(from, false_block);
end_scope();
- if (is_continue(true_block) || is_break(true_block))
+ if ((!true_block_is_selection_merge && is_continue(true_block)) || is_break(true_block))
{
statement("else");
begin_scope();
@@ -10844,44 +11672,6 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
}
}
-void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
-{
- // Propagate down the loop dominator block, so that dominated blocks can back trace.
- if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
- {
- uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator;
-
- auto set_dominator = [this](uint32_t self, uint32_t new_dominator) {
- auto &dominated_block = this->get<SPIRBlock>(self);
-
- // If we already have a loop dominator, we're trying to break out to merge targets
- // which should not update the loop dominator.
- if (!dominated_block.loop_dominator)
- dominated_block.loop_dominator = new_dominator;
- };
-
- // After merging a loop, we inherit the loop dominator always.
- if (block.merge_block)
- set_dominator(block.merge_block, block.loop_dominator);
-
- if (block.true_block)
- set_dominator(block.true_block, dominator);
- if (block.false_block)
- set_dominator(block.false_block, dominator);
- if (block.next_block)
- set_dominator(block.next_block, dominator);
- if (block.default_block)
- set_dominator(block.default_block, dominator);
-
- for (auto &c : block.cases)
- set_dominator(c.block, dominator);
-
- // In older glslang output continue_block can be == loop header.
- if (block.continue_block && block.continue_block != block.self)
- set_dominator(block.continue_block, dominator);
- }
-}
-
// FIXME: This currently cannot handle complex continue blocks
// as in do-while.
// This should be seen as a "trivial" continue block.
@@ -10902,7 +11692,6 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_tr
// Stamp out all blocks one after each other.
while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
{
- propagate_loop_dominators(*block);
// Write out all instructions we have in this block.
emit_block_instructions(*block);
@@ -11114,7 +11903,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
}
default:
- SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+ block.disable_block_optimization = true;
+ force_recompile();
+ begin_scope(); // We'll see an end_scope() later.
+ return false;
}
begin_scope();
@@ -11146,7 +11938,6 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
if (current_count == statement_count && condition_is_temporary)
{
- propagate_loop_dominators(child);
uint32_t target_block = child.true_block;
switch (continue_type)
@@ -11189,7 +11980,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
}
default:
- SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+ block.disable_block_optimization = true;
+ force_recompile();
+ begin_scope(); // We'll see an end_scope() later.
+ return false;
}
begin_scope();
@@ -11210,18 +12004,16 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
{
- // Enforce declaration order for regression testing purposes.
- sort(begin(block.dominated_variables), end(block.dominated_variables));
for (auto &v : block.dominated_variables)
flush_variable_declaration(v);
}
-void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>> &temporaries)
+void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
{
// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
// Need to sort these to ensure that reference output is stable.
sort(begin(temporaries), end(temporaries),
- [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
+ [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
for (auto &tmp : temporaries)
{
@@ -11240,8 +12032,6 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>
void CompilerGLSL::emit_block_chain(SPIRBlock &block)
{
- propagate_loop_dominators(block);
-
bool select_branch_to_true_block = false;
bool select_branch_to_false_block = false;
bool skip_direct_branch = false;
@@ -11255,8 +12045,22 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
// If we have loop variables, stop masking out access to the variable now.
- for (auto var : block.loop_variables)
- get<SPIRVariable>(var).loop_variable_enable = true;
+ for (auto var_id : block.loop_variables)
+ {
+ auto &var = get<SPIRVariable>(var_id);
+ var.loop_variable_enable = true;
+ // We're not going to declare the variable directly, so emit a copy here.
+ emit_variable_temporary_copies(var);
+ }
+
+ // Remember deferred declaration state. We will restore it before returning.
+ SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
+ for (size_t i = 0; i < block.dominated_variables.size(); i++)
+ {
+ uint32_t var_id = block.dominated_variables[i];
+ auto &var = get<SPIRVariable>(var_id);
+ rearm_dominated_variables[i] = var.deferred_declaration;
+ }
// This is the method often used by spirv-opt to implement loops.
// The loop header goes straight into the continue block.
@@ -11416,7 +12220,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
case SPIRBlock::MultiSelect:
{
auto &type = expression_type(block.condition);
- bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort;
+ bool unsigned_case =
+ type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
if (block.merge == SPIRBlock::MergeNone)
SPIRV_CROSS_THROW("Switch statement is not structured");
@@ -11441,61 +12246,182 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
if (block.need_ladder_break)
statement("bool _", block.self, "_ladder_break = false;");
+ // Find all unique case constructs.
+ unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
+ SmallVector<uint32_t> block_declaration_order;
+ SmallVector<uint32_t> literals_to_merge;
+
+ // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
+ // and let the default: block handle it.
+ // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
+ // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
+ for (auto &c : block.cases)
+ {
+ if (c.block != block.next_block && c.block != block.default_block)
+ {
+ if (!case_constructs.count(c.block))
+ block_declaration_order.push_back(c.block);
+ case_constructs[c.block].push_back(c.value);
+ }
+ else if (c.block == block.next_block && block.default_block != block.next_block)
+ {
+ // We might have to flush phi inside specific case labels.
+ // If we can piggyback on default:, do so instead.
+ literals_to_merge.push_back(c.value);
+ }
+ }
+
+ // Empty literal array -> default.
+ if (block.default_block != block.next_block)
+ {
+ auto &default_block = get<SPIRBlock>(block.default_block);
+
+ // We need to slide in the default block somewhere in this chain
+ // if there are fall-through scenarios since the default is declared separately in OpSwitch.
+ // Only consider trivial fall-through cases here.
+ size_t num_blocks = block_declaration_order.size();
+ bool injected_block = false;
+
+ for (size_t i = 0; i < num_blocks; i++)
+ {
+ auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
+ if (execution_is_direct_branch(case_block, default_block))
+ {
+ // Fallthrough to default block, we must inject the default block here.
+ block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
+ injected_block = true;
+ break;
+ }
+ else if (execution_is_direct_branch(default_block, case_block))
+ {
+ // Default case is falling through to another case label, we must inject the default block here.
+ block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
+ injected_block = true;
+ break;
+ }
+ }
+
+ // Order does not matter.
+ if (!injected_block)
+ block_declaration_order.push_back(block.default_block);
+
+ case_constructs[block.default_block] = {};
+ }
+
+ size_t num_blocks = block_declaration_order.size();
+
+ const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
+ return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
+ };
+
+ // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
+ // we need to flush phi nodes outside the switch block in a branch,
+ // and skip any Phi handling inside the case label to make fall-through work as expected.
+ // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
+ // inside the case label if at all possible.
+ for (size_t i = 1; i < num_blocks; i++)
+ {
+ if (flush_phi_required(block.self, block_declaration_order[i]) &&
+ flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
+ {
+ uint32_t target_block = block_declaration_order[i];
+
+ // Make sure we flush Phi, it might have been marked to be ignored earlier.
+ get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
+
+ auto &literals = case_constructs[target_block];
+
+ if (literals.empty())
+ {
+ // Oh boy, gotta make a complete negative test instead! o.o
+ // Find all possible literals that would *not* make us enter the default block.
+ // If none of those literals match, we flush Phi ...
+ SmallVector<string> conditions;
+ for (size_t j = 0; j < num_blocks; j++)
+ {
+ auto &negative_literals = case_constructs[block_declaration_order[j]];
+ for (auto &case_label : negative_literals)
+ conditions.push_back(join(to_enclosed_expression(block.condition),
+ " != ", to_case_label(case_label, unsigned_case)));
+ }
+
+ statement("if (", merge(conditions, " && "), ")");
+ begin_scope();
+ flush_phi(block.self, target_block);
+ end_scope();
+ }
+ else
+ {
+ SmallVector<string> conditions;
+ conditions.reserve(literals.size());
+ for (auto &case_label : literals)
+ conditions.push_back(join(to_enclosed_expression(block.condition),
+ " == ", to_case_label(case_label, unsigned_case)));
+ statement("if (", merge(conditions, " || "), ")");
+ begin_scope();
+ flush_phi(block.self, target_block);
+ end_scope();
+ }
+
+ // Mark the block so that we don't flush Phi from header to case label.
+ get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
+ }
+ }
+
emit_block_hints(block);
statement("switch (", to_expression(block.condition), ")");
begin_scope();
- // Multiple case labels can branch to same block, so find all unique blocks.
- bool emitted_default = false;
- unordered_set<uint32_t> emitted_blocks;
-
- for (auto &c : block.cases)
+ for (size_t i = 0; i < num_blocks; i++)
{
- if (emitted_blocks.count(c.block) != 0)
- continue;
+ uint32_t target_block = block_declaration_order[i];
+ auto &literals = case_constructs[target_block];
- // Emit all case labels which branch to our target.
- // FIXME: O(n^2), revisit if we hit shaders with 100++ case labels ...
- for (auto &other_case : block.cases)
+ if (literals.empty())
{
- if (other_case.block == c.block)
+ // Default case.
+ statement("default:");
+ }
+ else
+ {
+ for (auto &case_literal : literals)
{
// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
- auto case_value = unsigned_case ? convert_to_string(uint32_t(other_case.value)) :
- convert_to_string(int32_t(other_case.value));
- statement("case ", case_value, label_suffix, ":");
+ statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
}
}
- // Maybe we share with default block?
- if (block.default_block == c.block)
+ auto &case_block = get<SPIRBlock>(target_block);
+ if (backend.support_case_fallthrough && i + 1 < num_blocks &&
+ execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
{
- statement("default:");
- emitted_default = true;
+ // We will fall through here, so just terminate the block chain early.
+ // We still need to deal with Phi potentially.
+ // No need for a stack-like thing here since we only do fall-through when there is a
+ // single trivial branch to fall-through target..
+ current_emitting_switch_fallthrough = true;
}
-
- // Complete the target.
- emitted_blocks.insert(c.block);
+ else
+ current_emitting_switch_fallthrough = false;
begin_scope();
- branch(block.self, c.block);
+ branch(block.self, target_block);
end_scope();
+
+ current_emitting_switch_fallthrough = false;
}
- if (!emitted_default)
+ // Might still have to flush phi variables if we branch from loop header directly to merge target.
+ if (flush_phi_required(block.self, block.next_block))
{
- if (block.default_block != block.next_block)
- {
- statement("default:");
- begin_scope();
- if (is_break(block.default_block))
- SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ...");
- branch(block.self, block.default_block);
- end_scope();
- }
- else if (flush_phi_required(block.self, block.next_block))
+ if (block.default_block == block.next_block || !literals_to_merge.empty())
{
- statement("default:");
+ for (auto &case_literal : literals_to_merge)
+ statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
+
+ if (block.default_block == block.next_block)
+ statement("default:");
+
begin_scope();
flush_phi(block.self, block.next_block);
statement("break;");
@@ -11518,12 +12444,15 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
}
case SPIRBlock::Return:
+ {
for (auto &line : current_function->fixup_hooks_out)
line();
if (processing_entry_point)
emit_fixup();
+ auto &cfg = get_cfg_for_current_function();
+
if (block.return_value)
{
auto &type = expression_type(block.return_value);
@@ -11532,10 +12461,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
// If we cannot return arrays, we will have a special out argument we can write to instead.
// The backend is responsible for setting this up, and redirection the return values as appropriate.
if (ir.ids[block.return_value].get_type() != TypeUndef)
- emit_array_copy("SPIRV_Cross_return_value", block.return_value);
+ {
+ emit_array_copy("SPIRV_Cross_return_value", block.return_value, StorageClassFunction,
+ get_backing_variable_storage(block.return_value));
+ }
- if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
- block.loop_dominator != SPIRBlock::NoDominator)
+ if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+ block.loop_dominator != BlockID(SPIRBlock::NoDominator))
{
statement("return;");
}
@@ -11547,16 +12479,17 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
statement("return ", to_expression(block.return_value), ";");
}
}
- // If this block is the very final block and not called from control flow,
- // we do not need an explicit return which looks out of place. Just end the function here.
- // In the very weird case of for(;;) { return; } executing return is unconditional,
- // but we actually need a return here ...
- else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
- block.loop_dominator != SPIRBlock::NoDominator)
+ else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+ block.loop_dominator != BlockID(SPIRBlock::NoDominator))
{
+ // If this block is the very final block and not called from control flow,
+ // we do not need an explicit return which looks out of place. Just end the function here.
+ // In the very weird case of for(;;) { return; } executing return is unconditional,
+ // but we actually need a return here ...
statement("return;");
}
break;
+ }
case SPIRBlock::Kill:
statement(backend.discard_literal, ";");
@@ -11577,22 +12510,26 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
if (block.merge != SPIRBlock::MergeSelection)
flush_phi(block.self, block.next_block);
- // For merge selects we might have ignored the fact that a merge target
- // could have been a break; or continue;
- // We will need to deal with it here.
- if (is_loop_break(block.next_block))
- {
- // Cannot check for just break, because switch statements will also use break.
- assert(block.merge == SPIRBlock::MergeSelection);
- statement("break;");
- }
- else if (is_continue(block.next_block))
+ // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
+ if (!current_emitting_switch_fallthrough)
{
- assert(block.merge == SPIRBlock::MergeSelection);
- branch_to_continue(block.self, block.next_block);
+ // For merge selects we might have ignored the fact that a merge target
+ // could have been a break; or continue;
+ // We will need to deal with it here.
+ if (is_loop_break(block.next_block))
+ {
+ // Cannot check for just break, because switch statements will also use break.
+ assert(block.merge == SPIRBlock::MergeSelection);
+ statement("break;");
+ }
+ else if (is_continue(block.next_block))
+ {
+ assert(block.merge == SPIRBlock::MergeSelection);
+ branch_to_continue(block.self, block.next_block);
+ }
+ else if (BlockID(block.self) != block.next_block)
+ emit_block_chain(get<SPIRBlock>(block.next_block));
}
- else if (block.self != block.next_block)
- emit_block_chain(get<SPIRBlock>(block.next_block));
}
if (block.merge == SPIRBlock::MergeLoop)
@@ -11636,6 +12573,20 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
// Forget about control dependent expressions now.
block.invalidate_expressions.clear();
+
+ // After we return, we must be out of scope, so if we somehow have to re-emit this function,
+ // re-declare variables if necessary.
+ assert(rearm_dominated_variables.size() == block.dominated_variables.size());
+ for (size_t i = 0; i < block.dominated_variables.size(); i++)
+ {
+ uint32_t var = block.dominated_variables[i];
+ get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
+ }
+
+ // Just like for deferred declaration, we need to forget about loop variable enable
+ // if our block chain is reinstantiated later.
+ for (auto &var_id : block.loop_variables)
+ get<SPIRVariable>(var_id).loop_variable_enable = false;
}
void CompilerGLSL::begin_scope()
@@ -11652,6 +12603,14 @@ void CompilerGLSL::end_scope()
statement("}");
}
+void CompilerGLSL::end_scope(const string &trailer)
+{
+ if (!indent)
+ SPIRV_CROSS_THROW("Popping empty indent stack.");
+ indent--;
+ statement("}", trailer);
+}
+
void CompilerGLSL::end_scope_decl()
{
if (!indent)
@@ -11708,7 +12667,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
}
-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
{
statement(lhs, " = ", to_expression(rhs_id), ";");
}
@@ -11793,6 +12752,7 @@ void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &ex
case BuiltInBaseVertex:
case BuiltInBaseInstance:
case BuiltInDrawIndex:
+ case BuiltInFragStencilRefEXT:
expected_type = SPIRType::Int;
break;
@@ -11828,6 +12788,7 @@ void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &exp
case BuiltInLayer:
case BuiltInPrimitiveId:
case BuiltInViewportIndex:
+ case BuiltInFragStencilRefEXT:
expected_type = SPIRType::Int;
break;
@@ -11898,3 +12859,124 @@ void CompilerGLSL::reset_name_caches()
block_names.clear();
function_overloads.clear();
}
+
+void CompilerGLSL::fixup_type_alias()
+{
+ // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
+ // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
+ // alias if the slave type is a block type.
+ ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+ if (type.type_alias && type_is_block_like(type))
+ {
+ // Become the master.
+ ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
+ if (other_id == type.self)
+ return;
+
+ if (other_type.type_alias == type.type_alias)
+ other_type.type_alias = type.self;
+ });
+
+ this->get<SPIRType>(type.type_alias).type_alias = self;
+ type.type_alias = 0;
+ }
+ });
+
+ ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+ if (type.type_alias && type_is_block_like(type))
+ {
+ // This is not allowed, drop the type_alias.
+ type.type_alias = 0;
+ }
+ else if (type.type_alias && !type_is_block_like(this->get<SPIRType>(type.type_alias)))
+ {
+ // If the alias master is not a block-like type, there is no reason to use type aliasing.
+ // This case can happen if two structs are declared with the same name, but they are unrelated.
+ // Aliases are only used to deal with aliased types for structs which are used in different buffer types
+ // which all create a variant of the same struct with different DecorationOffset values.
+ type.type_alias = 0;
+ }
+ });
+}
+
+void CompilerGLSL::reorder_type_alias()
+{
+ // Reorder declaration of types so that the master of the type alias is always emitted first.
+ // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
+ // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
+ auto loop_lock = ir.create_loop_hard_lock();
+
+ auto &type_ids = ir.ids_for_type[TypeType];
+ for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
+ {
+ auto &type = get<SPIRType>(*alias_itr);
+ if (type.type_alias != TypeID(0) &&
+ !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
+ {
+ // We will skip declaring this type, so make sure the type_alias type comes before.
+ auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
+ assert(master_itr != end(type_ids));
+
+ if (alias_itr < master_itr)
+ {
+ // Must also swap the type order for the constant-type joined array.
+ auto &joined_types = ir.ids_for_constant_or_type;
+ auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
+ auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
+ assert(alt_alias_itr != end(joined_types));
+ assert(alt_master_itr != end(joined_types));
+
+ swap(*alias_itr, *master_itr);
+ swap(*alt_alias_itr, *alt_master_itr);
+ }
+ }
+ }
+}
+
+void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
+{
+ // If we are redirecting statements, ignore the line directive.
+ // Common case here is continue blocks.
+ if (redirect_statement)
+ return;
+
+ if (options.emit_line_directives)
+ {
+ require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
+ statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
+ }
+}
+
+void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
+{
+ // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
+ // we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
+ // Back-propagate the qualifier based on the expression dependency chain.
+
+ if (!has_decoration(id, DecorationNonUniformEXT))
+ {
+ set_decoration(id, DecorationNonUniformEXT);
+ force_recompile();
+ }
+
+ auto *e = maybe_get<SPIRExpression>(id);
+ auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
+ auto *chain = maybe_get<SPIRAccessChain>(id);
+ if (e)
+ {
+ for (auto &expr : e->expression_dependencies)
+ propagate_nonuniform_qualifier(expr);
+ for (auto &expr : e->implied_read_expressions)
+ propagate_nonuniform_qualifier(expr);
+ }
+ else if (combined)
+ {
+ propagate_nonuniform_qualifier(combined->image);
+ propagate_nonuniform_qualifier(combined->sampler);
+ }
+ else if (chain)
+ {
+ for (auto &expr : chain->implied_read_expressions)
+ propagate_nonuniform_qualifier(expr);
+ }
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
index 184bbbd..6f59bd8 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp
@@ -103,6 +103,10 @@ public:
// Does not apply to shader storage or push constant blocks.
bool emit_uniform_buffer_as_plain_uniforms = false;
+ // Emit OpLine directives if present in the module.
+ // May not correspond exactly to original source, but should be a good approximation.
+ bool emit_line_directives = false;
+
enum Precision
{
DontCare,
@@ -205,7 +209,7 @@ public:
// For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but
// mixing int and float is not.
// The name of the uniform array will be the same as the interface block name.
- void flatten_buffer_block(uint32_t id);
+ void flatten_buffer_block(VariableID id);
protected:
void reset();
@@ -219,6 +223,7 @@ protected:
SPIRBlock *current_emitting_block = nullptr;
SPIRBlock *current_emitting_switch = nullptr;
+ bool current_emitting_switch_fallthrough = false;
virtual void emit_instruction(const Instruction &instr);
void emit_block_instructions(SPIRBlock &block);
@@ -233,35 +238,40 @@ protected:
virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
virtual void emit_header();
+ void emit_line_directive(uint32_t file_id, uint32_t line_literal);
void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
const SpecializationConstant &y, const SpecializationConstant &z);
virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
virtual void emit_texture_op(const Instruction &i);
+ virtual std::string to_texture_op(const Instruction &i, bool *forward,
+ SmallVector<uint32_t> &inherited_expressions);
virtual void emit_subgroup_op(const Instruction &i);
virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0);
virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage);
virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const std::string &qualifier = "", uint32_t base_offset = 0);
+ virtual void emit_struct_padding_target(const SPIRType &type);
virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0);
std::string constant_expression(const SPIRConstant &c);
std::string constant_op_expression(const SPIRConstantOp &cop);
virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector);
virtual void emit_fixup();
virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
- virtual std::string to_func_call_arg(uint32_t id);
- virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+ virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
+ virtual std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad,
- bool has_dref, uint32_t lod);
- virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+ bool has_dref, uint32_t lod, uint32_t minlod);
+ virtual std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset,
uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample,
- bool *p_forward);
+ uint32_t minlod, bool *p_forward);
virtual void emit_buffer_block(const SPIRVariable &type);
virtual void emit_push_constant_block(const SPIRVariable &var);
virtual void emit_uniform(const SPIRVariable &var);
- virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id);
+ virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+ bool packed_type, bool row_major);
StringStream<> buffer;
@@ -322,6 +332,7 @@ protected:
void begin_scope();
void end_scope();
+ void end_scope(const std::string &trailer);
void end_scope_decl();
void end_scope_decl(const std::string &decl);
@@ -341,8 +352,10 @@ protected:
virtual bool is_non_native_row_major_matrix(uint32_t id);
virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
- bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
- virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
+ bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const;
+ bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const;
+ virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type,
+ uint32_t physical_type_id, bool is_packed);
std::unordered_set<std::string> local_variable_names;
std::unordered_set<std::string> resource_names;
@@ -363,6 +376,7 @@ protected:
struct BackendVariations
{
std::string discard_literal = "discard";
+ std::string demote_literal = "demote";
std::string null_pointer_literal = "";
bool float_literal_suffix = false;
bool double_literal_suffix = true;
@@ -377,6 +391,7 @@ protected:
const char *int16_t_literal_suffix = "s";
const char *uint16_t_literal_suffix = "us";
const char *nonuniform_qualifier = "nonuniformEXT";
+ const char *boolean_mix_function = "mix";
bool swizzle_is_function = false;
bool shared_is_implied = false;
bool unsized_array_supported = true;
@@ -387,7 +402,6 @@ protected:
bool can_declare_arrays_inline = true;
bool native_row_major_matrix = true;
bool use_constructor_splatting = true;
- bool boolean_mix_support = true;
bool allow_precision_qualifiers = false;
bool can_swizzle_scalar = false;
bool force_gl_in_out_block = false;
@@ -398,6 +412,8 @@ protected:
bool array_is_value_type = true;
bool comparison_image_samples_scalar = false;
bool native_pointers = false;
+ bool support_small_type_sampling_result = false;
+ bool support_case_fallthrough = true;
} backend;
void emit_struct(SPIRType &type);
@@ -412,24 +428,24 @@ protected:
void emit_interface_block(const SPIRVariable &type);
void emit_flattened_io_block(const SPIRVariable &var, const char *qual);
void emit_block_chain(SPIRBlock &block);
- void emit_hoisted_temporaries(SmallVector<std::pair<uint32_t, uint32_t>> &temporaries);
+ void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
std::string constant_value_macro_name(uint32_t id);
void emit_constant(const SPIRConstant &constant);
void emit_specialization_constant_op(const SPIRConstantOp &constant);
std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block);
bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method);
- void propagate_loop_dominators(const SPIRBlock &block);
- void branch(uint32_t from, uint32_t to);
- void branch_to_continue(uint32_t from, uint32_t to);
- void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block);
- void flush_phi(uint32_t from, uint32_t to);
- bool flush_phi_required(uint32_t from, uint32_t to);
+ void branch(BlockID from, BlockID to);
+ void branch_to_continue(BlockID from, BlockID to);
+ void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block);
+ void flush_phi(BlockID from, BlockID to);
void flush_variable_declaration(uint32_t id);
void flush_undeclared_variables(SPIRBlock &block);
+ void emit_variable_temporary_copies(const SPIRVariable &var);
bool should_dereference(uint32_t id);
- bool should_forward(uint32_t id);
+ bool should_forward(uint32_t id) const;
+ bool should_suppress_usage_tracking(uint32_t id) const;
void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
@@ -445,11 +461,18 @@ protected:
SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
const char *op, SPIRType::BaseType input_type);
+ void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+ uint32_t op2, const char *op, SPIRType::BaseType expected_result_type,
+ SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+ SPIRType::BaseType input_type2);
+ void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
+ uint32_t op3, const char *op, SPIRType::BaseType offset_count_type);
void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op);
void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
- void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+ void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+ bool negate, SPIRType::BaseType expected_type);
void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
@@ -460,7 +483,8 @@ protected:
uint32_t false_value);
void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
- bool expression_is_forwarded(uint32_t id);
+ bool expression_is_forwarded(uint32_t id) const;
+ bool expression_suppresses_usage_tracking(uint32_t id) const;
SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
bool suppress_usage_tracking = false);
@@ -494,8 +518,11 @@ protected:
SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
std::string to_expression(uint32_t id, bool register_expression_read = true);
+ std::string to_composite_constructor_expression(uint32_t id);
+ std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type);
std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
+ std::string to_unpacked_row_major_matrix_expression(uint32_t id);
std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
@@ -517,15 +544,16 @@ protected:
virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
virtual std::string to_interpolation_qualifiers(const Bitset &flags);
std::string layout_for_variable(const SPIRVariable &variable);
- std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id);
+ std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
virtual bool skip_argument(uint32_t id) const;
- virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id);
+ virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
+ spv::StorageClass rhs_storage);
virtual void emit_block_hints(const SPIRBlock &block);
virtual std::string to_initializer_expression(const SPIRVariable &var);
bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0,
uint32_t end_offset = ~(0u));
- std::string buffer_to_packing_standard(const SPIRType &type, bool enable_std430);
+ std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout);
uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing);
uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
@@ -661,6 +689,11 @@ protected:
char current_locale_radix_character = '.';
+ void fixup_type_alias();
+ void reorder_type_alias();
+
+ void propagate_nonuniform_qualifier(uint32_t id);
+
private:
void init();
};
diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
index 46613c5..4d4e276 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp
@@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
}
}
-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
{
auto &imagetype = get<SPIRType>(type.image.type);
const char *dim = nullptr;
@@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
if (type.image.sampled == 1)
return join("Buffer<", type_to_glsl(imagetype), components, ">");
else if (type.image.sampled == 2)
+ {
+ if (interlocked_resources.count(id))
+ return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+ ">");
return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+ }
else
SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
case DimSubpassData:
@@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
const char *arrayed = type.image.arrayed ? "Array" : "";
const char *ms = type.image.ms ? "MS" : "";
const char *rw = typed_load ? "RW" : "";
+ if (typed_load && interlocked_resources.count(id))
+ rw = "RasterizerOrdered";
return join(rw, "Texture", dim, ms, arrayed, "<",
typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
join(type_to_glsl(imagetype), components),
@@ -1038,8 +1045,9 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
{
bool emitted = false;
SpecializationConstant wg_x, wg_y, wg_z;
- uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+ ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+ auto loop_lock = ir.create_loop_hard_lock();
for (auto &id_ : ir.ids_for_constant_or_type)
{
auto &id = ir.ids[id_];
@@ -1742,6 +1750,46 @@ void CompilerHLSL::emit_resources()
end_scope();
statement("");
}
+
+ if (requires_scalar_reflect)
+ {
+ // FP16/FP64? No templates in HLSL.
+ statement("float SPIRV_Cross_Reflect(float i, float n)");
+ begin_scope();
+ statement("return i - 2.0 * dot(n, i) * n;");
+ end_scope();
+ statement("");
+ }
+
+ if (requires_scalar_refract)
+ {
+ // FP16/FP64? No templates in HLSL.
+ statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
+ begin_scope();
+ statement("float NoI = n * i;");
+ statement("float NoI2 = NoI * NoI;");
+ statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
+ statement("if (k < 0.0)");
+ begin_scope();
+ statement("return 0.0;");
+ end_scope();
+ statement("else");
+ begin_scope();
+ statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+ end_scope();
+ end_scope();
+ statement("");
+ }
+
+ if (requires_scalar_faceforward)
+ {
+ // FP16/FP64? No templates in HLSL.
+ statement("float SPIRV_Cross_FaceForward(float n, float i, float nref)");
+ begin_scope();
+ statement("return i * nref < 0.0 ? n : -n;");
+ end_scope();
+ statement("");
+ }
}
string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -1781,7 +1829,7 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
string packing_offset;
bool is_push_constant = type.storage == StorageClassPushConstant;
- if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) &&
+ if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
has_member_decoration(type.self, index, DecorationOffset))
{
uint32_t offset = memb[index].offset - base_offset;
@@ -1807,16 +1855,20 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
Bitset flags = ir.get_buffer_block_flags(var);
bool is_readonly = flags.get(DecorationNonWritable);
bool is_coherent = flags.get(DecorationCoherent);
+ bool is_interlocked = interlocked_resources.count(var.self) > 0;
+ const char *type_name = "ByteAddressBuffer ";
+ if (!is_readonly)
+ type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
add_resource_name(var.self);
- statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
- to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+ statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+ to_resource_binding(var), ";");
}
else
{
if (type.array.empty())
{
if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset))
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
else
SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
@@ -1902,7 +1954,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
auto &type = get<SPIRType>(var.basetype);
if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end))
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
else
SPIRV_CROSS_THROW(
"root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
@@ -1973,9 +2025,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
}
}
-string CompilerHLSL::to_func_call_arg(uint32_t id)
+string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
{
- string arg_str = CompilerGLSL::to_func_call_arg(id);
+ string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
if (hlsl_options.shader_model <= 30)
return arg_str;
@@ -2437,7 +2489,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
uint32_t result_type = ops[0];
uint32_t id = ops[1];
- uint32_t img = ops[2];
+ VariableID img = ops[2];
uint32_t coord = ops[3];
uint32_t dref = 0;
uint32_t comp = 0;
@@ -2449,6 +2501,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
inherited_expressions.push_back(coord);
+ // Make sure non-uniform decoration is back-propagated to where it needs to be.
+ if (has_decoration(img, DecorationNonUniformEXT))
+ propagate_nonuniform_qualifier(img);
+
switch (op)
{
case OpImageSampleDrefImplicitLod:
@@ -2536,6 +2592,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
uint32_t offset = 0;
uint32_t coffsets = 0;
uint32_t sample = 0;
+ uint32_t minlod = 0;
uint32_t flags = 0;
if (length)
@@ -2562,10 +2619,14 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
test(offset, ImageOperandsOffsetMask);
test(coffsets, ImageOperandsConstOffsetsMask);
test(sample, ImageOperandsSampleMask);
+ test(minlod, ImageOperandsMinLodMask);
string expr;
string texop;
+ if (minlod != 0)
+ SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL.");
+
if (op == OpImageFetch)
{
if (hlsl_options.shader_model < 40)
@@ -2831,7 +2892,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
// according to GLSL spec, and it depends on the sampler itself.
// Just assume X == Y, so we will need to splat the result to a float2.
statement("float _", id, "_tmp = ", expr, ";");
- emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true);
+ statement("float2 _", id, " = _", id, "_tmp.xx;");
+ set<SPIRExpression>(id, join("_", id), result_type, true);
}
else
{
@@ -2847,7 +2909,6 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleProjDrefImplicitLod:
- case OpImageQueryLod:
register_control_dependent_expression(id);
break;
@@ -3198,8 +3259,11 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
case GLSLstd450FindILsb:
- emit_unary_func_op(result_type, id, args[0], "firstbitlow");
+ {
+ auto basetype = expression_type(args[0]).basetype;
+ emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype);
break;
+ }
case GLSLstd450FindSMsb:
emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type);
@@ -3240,6 +3304,59 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
}
+ case GLSLstd450Normalize:
+ // HLSL does not support scalar versions here.
+ if (expression_type(args[0]).vecsize == 1)
+ {
+ // Returns -1 or 1 for valid input, sign() does the job.
+ emit_unary_func_op(result_type, id, args[0], "sign");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Reflect:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ {
+ if (!requires_scalar_reflect)
+ {
+ requires_scalar_reflect = true;
+ force_recompile();
+ }
+ emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Refract:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ {
+ if (!requires_scalar_refract)
+ {
+ requires_scalar_refract = true;
+ force_recompile();
+ }
+ emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450FaceForward:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ {
+ if (!requires_scalar_faceforward)
+ {
+ requires_scalar_faceforward = true;
+ force_recompile();
+ }
+ emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_FaceForward");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
@@ -3384,6 +3501,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
uint32_t id = ops[1];
uint32_t ptr = ops[2];
+ if (has_decoration(ptr, DecorationNonUniformEXT))
+ propagate_nonuniform_qualifier(ptr);
+
auto load_expr = read_access_chain(*chain);
bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
@@ -3417,6 +3537,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
// Make sure we trigger a read of the constituents in the access chain.
track_expression_read(chain.self);
+ if (has_decoration(chain.self, DecorationNonUniformEXT))
+ propagate_nonuniform_qualifier(chain.self);
+
SPIRType target_type;
target_type.basetype = SPIRType::UInt;
target_type.vecsize = type.vecsize;
@@ -3601,7 +3724,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
e.row_major_matrix = row_major_matrix;
e.matrix_stride = matrix_stride;
e.immutable = should_forward(ops[2]);
- e.loaded_from = backing_variable ? backing_variable->self : 0;
+ e.loaded_from = backing_variable ? backing_variable->self : ID(0);
if (chain)
{
@@ -3909,6 +4032,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
// If we need to do implicit bitcasts, make sure we do it with the correct type.
uint32_t integer_width = get_integer_width_for_instruction(instruction);
auto int_type = to_signed_basetype(integer_width);
+ auto uint_type = to_unsigned_basetype(integer_width);
switch (opcode)
{
@@ -3933,22 +4057,50 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpMatrixTimesVector:
{
+ // Matrices are kept in a transposed state all the time, flip multiplication order always.
emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
break;
}
case OpVectorTimesMatrix:
{
+ // Matrices are kept in a transposed state all the time, flip multiplication order always.
emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
break;
}
case OpMatrixTimesMatrix:
{
+ // Matrices are kept in a transposed state all the time, flip multiplication order always.
emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
break;
}
+ case OpOuterProduct:
+ {
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t a = ops[2];
+ uint32_t b = ops[3];
+
+ auto &type = get<SPIRType>(result_type);
+ string expr = type_to_glsl_constructor(type);
+ expr += "(";
+ for (uint32_t col = 0; col < type.columns; col++)
+ {
+ expr += to_enclosed_expression(a);
+ expr += " * ";
+ expr += to_extract_component_expression(b, col);
+ if (col + 1 < type.columns)
+ expr += ", ";
+ }
+ expr += ")";
+ emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+ inherit_expression_dependencies(id, a);
+ inherit_expression_dependencies(id, b);
+ break;
+ }
+
case OpFMod:
{
if (!requires_op_fmod)
@@ -4043,7 +4195,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
else
HLSL_BOP_CAST(==, int_type);
break;
@@ -4051,12 +4203,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpLogicalEqual:
case OpFOrdEqual:
+ case OpFUnordEqual:
{
+ // HLSL != operator is unordered.
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+ // isnan() is apparently implemented as x != x as well.
+ // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
+ // HACK: FUnordEqual will be implemented as FOrdEqual.
+
auto result_type = ops[0];
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
else
HLSL_BOP(==);
break;
@@ -4068,7 +4227,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
else
HLSL_BOP_CAST(!=, int_type);
break;
@@ -4076,12 +4235,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpLogicalNotEqual:
case OpFOrdNotEqual:
+ case OpFUnordNotEqual:
{
+ // HLSL != operator is unordered.
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+ // isnan() is apparently implemented as x != x as well.
+
+ // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
+ // We would need to do something like not(UnordEqual), but that cannot be expressed either.
+ // Adding a lot of NaN checks would be a breaking change from perspective of performance.
+ // SPIR-V will generally use isnan() checks when this even matters.
+ // HACK: FOrdNotEqual will be implemented as FUnordEqual.
+
auto result_type = ops[0];
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
else
HLSL_BOP(!=);
break;
@@ -4092,10 +4262,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
{
auto result_type = ops[0];
auto id = ops[1];
- auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpUGreaterThan ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
else
HLSL_BOP_CAST(>, type);
break;
@@ -4107,21 +4277,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
else
HLSL_BOP(>);
break;
}
+ case OpFUnordGreaterThan:
+ {
+ auto result_type = ops[0];
+ auto id = ops[1];
+
+ if (expression_type(ops[2]).vecsize > 1)
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
+ else
+ CompilerGLSL::emit_instruction(instruction);
+ break;
+ }
+
case OpUGreaterThanEqual:
case OpSGreaterThanEqual:
{
auto result_type = ops[0];
auto id = ops[1];
- auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
else
HLSL_BOP_CAST(>=, type);
break;
@@ -4133,21 +4315,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
else
HLSL_BOP(>=);
break;
}
+ case OpFUnordGreaterThanEqual:
+ {
+ auto result_type = ops[0];
+ auto id = ops[1];
+
+ if (expression_type(ops[2]).vecsize > 1)
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
+ else
+ CompilerGLSL::emit_instruction(instruction);
+ break;
+ }
+
case OpULessThan:
case OpSLessThan:
{
auto result_type = ops[0];
auto id = ops[1];
- auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpULessThan ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
else
HLSL_BOP_CAST(<, type);
break;
@@ -4159,21 +4353,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
else
HLSL_BOP(<);
break;
}
+ case OpFUnordLessThan:
+ {
+ auto result_type = ops[0];
+ auto id = ops[1];
+
+ if (expression_type(ops[2]).vecsize > 1)
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
+ else
+ CompilerGLSL::emit_instruction(instruction);
+ break;
+ }
+
case OpULessThanEqual:
case OpSLessThanEqual:
{
auto result_type = ops[0];
auto id = ops[1];
- auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+ auto type = opcode == OpULessThanEqual ? uint_type : int_type;
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
else
HLSL_BOP_CAST(<=, type);
break;
@@ -4185,12 +4391,24 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto id = ops[1];
if (expression_type(ops[2]).vecsize > 1)
- emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
else
HLSL_BOP(<=);
break;
}
+ case OpFUnordLessThanEqual:
+ {
+ auto result_type = ops[0];
+ auto id = ops[1];
+
+ if (expression_type(ops[2]).vecsize > 1)
+ emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
+ else
+ CompilerGLSL::emit_instruction(instruction);
+ break;
+ }
+
case OpImageQueryLod:
emit_texture_op(instruction);
break;
@@ -4343,7 +4561,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
// When using the pointer, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
- e.loaded_from = var ? var->self : 0;
+ e.loaded_from = var ? var->self : ID(0);
break;
}
@@ -4501,8 +4719,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
}
case OpBitCount:
- HLSL_UFOP(countbits);
+ {
+ auto basetype = expression_type(ops[2]).basetype;
+ emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype);
break;
+ }
case OpBitReverse:
HLSL_UFOP(reversebits);
@@ -4527,6 +4748,15 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
break;
}
+ case OpIsHelperInvocationEXT:
+ SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");
+
+ case OpBeginInvocationInterlockEXT:
+ case OpEndInvocationInterlockEXT:
+ if (hlsl_options.shader_model < 51)
+ SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+ break; // Nothing to do in the body
+
default:
CompilerGLSL::emit_instruction(instruction);
break;
@@ -4601,7 +4831,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve
remap_vertex_attributes.push_back(vertex_attributes);
}
-uint32_t CompilerHLSL::remap_num_workgroups_builtin()
+VariableID CompilerHLSL::remap_num_workgroups_builtin()
{
update_active_builtins();
@@ -4683,23 +4913,28 @@ string CompilerHLSL::compile()
backend.uint16_t_literal_suffix = "u";
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
+ backend.demote_literal = "discard";
+ backend.boolean_mix_function = "";
backend.swizzle_is_function = false;
backend.shared_is_implied = true;
backend.unsized_array_supported = true;
backend.explicit_struct_type = false;
backend.use_initializer_list = true;
backend.use_constructor_splatting = false;
- backend.boolean_mix_support = false;
backend.can_swizzle_scalar = true;
backend.can_declare_struct_inline = false;
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
backend.nonuniform_qualifier = "NonUniformResourceIndex";
+ backend.support_case_fallthrough = false;
+ fixup_type_alias();
+ reorder_type_alias();
build_function_control_flow_graphs_and_analyze();
validate_shader_model();
update_active_builtins();
analyze_image_and_sampler_usage();
+ analyze_interlocked_resource_usage();
// Subpass input needs SV_Position.
if (need_subpass_input)
diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
index d96c911..eb968f0 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp
@@ -114,7 +114,7 @@ public:
// If non-zero, this returns the variable ID of a cbuffer which corresponds to
// the cbuffer declared above. By default, no binding or descriptor set decoration is set,
// so the calling application should declare explicit bindings on this ID before calling compile().
- uint32_t remap_num_workgroups_builtin();
+ VariableID remap_num_workgroups_builtin();
private:
std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
@@ -145,7 +145,7 @@ private:
std::string layout_for_member(const SPIRType &type, uint32_t index) override;
std::string to_interpolation_qualifiers(const Bitset &flags) override;
std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
- std::string to_func_call_arg(uint32_t id) override;
+ std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
std::string to_sampler_expression(uint32_t id);
std::string to_resource_binding(const SPIRVariable &var);
std::string to_resource_binding_sampler(const SPIRVariable &var);
@@ -167,6 +167,8 @@ private:
void replace_illegal_names() override;
Options hlsl_options;
+
+ // TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
bool requires_op_fmod = false;
bool requires_fp16_packing = false;
bool requires_explicit_fp16_packing = false;
@@ -179,6 +181,9 @@ private:
bool requires_inverse_2x2 = false;
bool requires_inverse_3x3 = false;
bool requires_inverse_4x4 = false;
+ bool requires_scalar_reflect = false;
+ bool requires_scalar_refract = false;
+ bool requires_scalar_faceforward = false;
uint64_t required_textureSizeVariants = 0;
void require_texture_query_variant(const SPIRType &type);
diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
index 4a4f77a..d7cb138 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp
@@ -28,8 +28,6 @@ using namespace std;
static const uint32_t k_unknown_location = ~0u;
static const uint32_t k_unknown_component = ~0u;
-static const uint32_t k_aux_mbr_idx_swizzle_const = 0u;
-
CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
: CompilerGLSL(move(spirv_))
{
@@ -59,7 +57,14 @@ void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va)
void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
{
- resource_bindings.push_back({ binding, false });
+ StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
+ resource_bindings[tuple] = { binding, false };
+}
+
+void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
+{
+ SetBindingPair pair = { desc_set, binding };
+ buffers_requiring_dynamic_offset[pair] = { index, 0 };
}
void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
@@ -68,6 +73,17 @@ void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
argument_buffer_discrete_mask |= 1u << desc_set;
}
+void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage)
+{
+ if (desc_set < kMaxArgumentBuffers)
+ {
+ if (device_storage)
+ argument_buffer_device_storage_mask |= 1u << desc_set;
+ else
+ argument_buffer_device_storage_mask &= ~(1u << desc_set);
+ }
+}
+
bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
{
return vtx_attrs_in_use.count(location) != 0;
@@ -75,12 +91,29 @@ bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding)
{
- auto itr = find_if(begin(resource_bindings), end(resource_bindings),
- [&](const std::pair<MSLResourceBinding, bool> &resource) -> bool {
- return model == resource.first.stage && desc_set == resource.first.desc_set &&
- binding == resource.first.binding;
- });
- return itr != end(resource_bindings) && itr->second;
+ StageSetBinding tuple = { model, desc_set, binding };
+ auto itr = resource_bindings.find(tuple);
+ return itr != end(resource_bindings) && itr->second.second;
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
+{
+ return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const
+{
+ return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const
+{
+ return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const
+{
+ return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary);
}
void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
@@ -93,7 +126,19 @@ void CompilerMSL::build_implicit_builtins()
bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex;
bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl;
- if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params)
+ bool need_subgroup_mask =
+ active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) ||
+ active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) ||
+ active_input_builtins.get(BuiltInSubgroupLtMask);
+ bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
+ active_input_builtins.get(BuiltInSubgroupGtMask));
+ bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index &&
+ (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
+ bool need_dispatch_base =
+ msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute &&
+ (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId));
+ if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
+ need_multiview || need_dispatch_base || needs_subgroup_invocation_id)
{
bool has_frag_coord = false;
bool has_sample_id = false;
@@ -103,18 +148,23 @@ void CompilerMSL::build_implicit_builtins()
bool has_base_instance = false;
bool has_invocation_id = false;
bool has_primitive_id = false;
+ bool has_subgroup_invocation_id = false;
+ bool has_subgroup_size = false;
+ bool has_view_idx = false;
+ uint32_t workgroup_id_type = 0;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
return;
- if (need_subpass_input && ir.meta[var.self].decoration.builtin_type == BuiltInFragCoord)
+ BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
+ if (need_subpass_input && builtin == BuiltInFragCoord)
{
builtin_frag_coord_id = var.self;
has_frag_coord = true;
}
- if (need_sample_pos && ir.meta[var.self].decoration.builtin_type == BuiltInSampleId)
+ if (need_sample_pos && builtin == BuiltInSampleId)
{
builtin_sample_id_id = var.self;
has_sample_id = true;
@@ -122,7 +172,7 @@ void CompilerMSL::build_implicit_builtins()
if (need_vertex_params)
{
- switch (ir.meta[var.self].decoration.builtin_type)
+ switch (builtin)
{
case BuiltInVertexIndex:
builtin_vertex_idx_id = var.self;
@@ -147,7 +197,7 @@ void CompilerMSL::build_implicit_builtins()
if (need_tesc_params)
{
- switch (ir.meta[var.self].decoration.builtin_type)
+ switch (builtin)
{
case BuiltInInvocationId:
builtin_invocation_id_id = var.self;
@@ -161,6 +211,41 @@ void CompilerMSL::build_implicit_builtins()
break;
}
}
+
+ if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId)
+ {
+ builtin_subgroup_invocation_id_id = var.self;
+ has_subgroup_invocation_id = true;
+ }
+
+ if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
+ {
+ builtin_subgroup_size_id = var.self;
+ has_subgroup_size = true;
+ }
+
+ if (need_multiview)
+ {
+ if (builtin == BuiltInInstanceIndex)
+ {
+ // The view index here is derived from the instance index.
+ builtin_instance_idx_id = var.self;
+ has_instance_idx = true;
+ }
+
+ if (builtin == BuiltInViewIndex)
+ {
+ builtin_view_idx_id = var.self;
+ has_view_idx = true;
+ }
+ }
+
+ // The base workgroup needs to have the same type and vector size
+ // as the workgroup or invocation ID, so keep track of the type that
+ // was used.
+ if (need_dispatch_base && workgroup_id_type == 0 &&
+ (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId))
+ workgroup_id_type = var.basetype;
});
if (!has_frag_coord && need_subpass_input)
@@ -188,6 +273,7 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
builtin_frag_coord_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
}
if (!has_sample_id && need_sample_pos)
@@ -214,9 +300,11 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
builtin_sample_id_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
}
- if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
+ if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
+ (need_multiview && (!has_instance_idx || !has_view_idx)))
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_id = offset;
@@ -235,7 +323,7 @@ void CompilerMSL::build_implicit_builtins()
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
- if (!has_vertex_idx)
+ if (need_vertex_params && !has_vertex_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -243,8 +331,10 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex);
builtin_vertex_idx_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
}
- if (!has_base_vertex)
+
+ if (need_vertex_params && !has_base_vertex)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -252,8 +342,10 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex);
builtin_base_vertex_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
}
- if (!has_instance_idx)
+
+ if (!has_instance_idx) // Needed by both multiview and tessellation
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -261,8 +353,10 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
builtin_instance_idx_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
}
- if (!has_base_instance)
+
+ if (need_vertex_params && !has_base_instance)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -270,6 +364,39 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance);
builtin_base_instance_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
+ }
+
+ if (need_multiview)
+ {
+ // Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+ // it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+ // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+ // gl_Layer is an output in vertex-pipeline shaders.
+ uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+ SPIRType uint_type_ptr_out;
+ uint_type_ptr_out = uint_type;
+ uint_type_ptr_out.pointer = true;
+ uint_type_ptr_out.parent_type = type_id;
+ uint_type_ptr_out.storage = StorageClassOutput;
+ auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+ ptr_out_type.self = type_id;
+ uint32_t var_id = type_ptr_out_id + 1;
+ set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+ set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+ builtin_layer_id = var_id;
+ mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+ }
+
+ if (need_multiview && !has_view_idx)
+ {
+ uint32_t var_id = ir.increase_bound_by(1);
+
+ // Create gl_ViewIndex.
+ set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+ set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
+ builtin_view_idx_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
}
}
@@ -300,7 +427,9 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId);
builtin_invocation_id_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id);
}
+
if (!has_primitive_id)
{
uint32_t var_id = ir.increase_bound_by(1);
@@ -309,56 +438,198 @@ void CompilerMSL::build_implicit_builtins()
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId);
builtin_primitive_id_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id);
+ }
+ }
+
+ if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id))
+ {
+ uint32_t offset = ir.increase_bound_by(3);
+ uint32_t type_id = offset;
+ uint32_t type_ptr_id = offset + 1;
+ uint32_t var_id = offset + 2;
+
+ // Create gl_SubgroupInvocationID.
+ SPIRType uint_type;
+ uint_type.basetype = SPIRType::UInt;
+ uint_type.width = 32;
+ set<SPIRType>(type_id, uint_type);
+
+ SPIRType uint_type_ptr;
+ uint_type_ptr = uint_type;
+ uint_type_ptr.pointer = true;
+ uint_type_ptr.parent_type = type_id;
+ uint_type_ptr.storage = StorageClassInput;
+ auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+ ptr_type.self = type_id;
+
+ set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+ set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId);
+ builtin_subgroup_invocation_id_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
+ }
+
+ if (!has_subgroup_size && need_subgroup_ge_mask)
+ {
+ uint32_t offset = ir.increase_bound_by(3);
+ uint32_t type_id = offset;
+ uint32_t type_ptr_id = offset + 1;
+ uint32_t var_id = offset + 2;
+
+ // Create gl_SubgroupSize.
+ SPIRType uint_type;
+ uint_type.basetype = SPIRType::UInt;
+ uint_type.width = 32;
+ set<SPIRType>(type_id, uint_type);
+
+ SPIRType uint_type_ptr;
+ uint_type_ptr = uint_type;
+ uint_type_ptr.pointer = true;
+ uint_type_ptr.parent_type = type_id;
+ uint_type_ptr.storage = StorageClassInput;
+ auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+ ptr_type.self = type_id;
+
+ set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+ set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
+ builtin_subgroup_size_id = var_id;
+ mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
+ }
+
+ if (need_dispatch_base)
+ {
+ uint32_t var_id;
+ if (msl_options.supports_msl_version(1, 2))
+ {
+ // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin
+ // to convey this information and save a buffer slot.
+ uint32_t offset = ir.increase_bound_by(1);
+ var_id = offset;
+
+ set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput);
+ set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase);
+ get_entry_point().interface_variables.push_back(var_id);
+ }
+ else
+ {
+ // Otherwise, we need to fall back to a good ol' fashioned buffer.
+ uint32_t offset = ir.increase_bound_by(2);
+ var_id = offset;
+ uint32_t type_id = offset + 1;
+
+ SPIRType var_type = get<SPIRType>(workgroup_id_type);
+ var_type.storage = StorageClassUniform;
+ set<SPIRType>(type_id, var_type);
+
+ set<SPIRVariable>(var_id, type_id, StorageClassUniform);
+ // This should never match anything.
+ set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+ set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index);
+ set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+ msl_options.indirect_params_buffer_index);
}
+ set_name(var_id, "spvDispatchBase");
+ builtin_dispatch_base_id = var_id;
}
}
- if (needs_aux_buffer_def)
- {
- uint32_t offset = ir.increase_bound_by(5);
- uint32_t type_id = offset;
- uint32_t type_arr_id = offset + 1;
- uint32_t struct_id = offset + 2;
- uint32_t struct_ptr_id = offset + 3;
- uint32_t var_id = offset + 4;
-
- // Create a buffer to hold extra data, including the swizzle constants.
- SPIRType uint_type;
- uint_type.basetype = SPIRType::UInt;
- uint_type.width = 32;
- set<SPIRType>(type_id, uint_type);
-
- SPIRType uint_type_arr = uint_type;
- uint_type_arr.array.push_back(0);
- uint_type_arr.array_size_literal.push_back(true);
- uint_type_arr.parent_type = type_id;
- set<SPIRType>(type_arr_id, uint_type_arr);
- set_decoration(type_arr_id, DecorationArrayStride, 4);
-
- SPIRType struct_type;
- struct_type.basetype = SPIRType::Struct;
- struct_type.member_types.push_back(type_arr_id);
- auto &type = set<SPIRType>(struct_id, struct_type);
- type.self = struct_id;
- set_decoration(struct_id, DecorationBlock);
- set_name(struct_id, "spvAux");
- set_member_name(struct_id, k_aux_mbr_idx_swizzle_const, "swizzleConst");
- set_member_decoration(struct_id, k_aux_mbr_idx_swizzle_const, DecorationOffset, 0);
-
- SPIRType struct_type_ptr = struct_type;
- struct_type_ptr.pointer = true;
- struct_type_ptr.parent_type = struct_id;
- struct_type_ptr.storage = StorageClassUniform;
- auto &ptr_type = set<SPIRType>(struct_ptr_id, struct_type_ptr);
- ptr_type.self = struct_id;
-
- set<SPIRVariable>(var_id, struct_ptr_id, StorageClassUniform);
- set_name(var_id, "spvAuxBuffer");
+ if (needs_swizzle_buffer_def)
+ {
+ uint32_t var_id = build_constant_uint_array_pointer();
+ set_name(var_id, "spvSwizzleConstants");
+ // This should never match anything.
+ set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding);
+ set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index);
+ set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index);
+ swizzle_buffer_id = var_id;
+ }
+
+ if (!buffers_requiring_array_length.empty())
+ {
+ uint32_t var_id = build_constant_uint_array_pointer();
+ set_name(var_id, "spvBufferSizeConstants");
+ // This should never match anything.
+ set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding);
+ set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index);
+ set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
+ buffer_size_buffer_id = var_id;
+ }
+
+ if (needs_view_mask_buffer())
+ {
+ uint32_t var_id = build_constant_uint_array_pointer();
+ set_name(var_id, "spvViewMask");
+ // This should never match anything.
+ set_decoration(var_id, DecorationDescriptorSet, ~(4u));
+ set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
+ set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
+ view_mask_buffer_id = var_id;
+ }
+
+ if (!buffers_requiring_dynamic_offset.empty())
+ {
+ uint32_t var_id = build_constant_uint_array_pointer();
+ set_name(var_id, "spvDynamicOffsets");
// This should never match anything.
- set_decoration(var_id, DecorationDescriptorSet, 0xFFFFFFFE);
- set_decoration(var_id, DecorationBinding, msl_options.aux_buffer_index);
- aux_buffer_id = var_id;
+ set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+ set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
+ set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+ msl_options.dynamic_offsets_buffer_index);
+ dynamic_offsets_buffer_id = var_id;
+ }
+}
+
+void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
+{
+ Bitset *active_builtins = nullptr;
+ switch (storage)
+ {
+ case StorageClassInput:
+ active_builtins = &active_input_builtins;
+ break;
+
+ case StorageClassOutput:
+ active_builtins = &active_output_builtins;
+ break;
+
+ default:
+ break;
}
+
+ assert(active_builtins != nullptr);
+ active_builtins->set(builtin);
+ get_entry_point().interface_variables.push_back(id);
+}
+
+uint32_t CompilerMSL::build_constant_uint_array_pointer()
+{
+ uint32_t offset = ir.increase_bound_by(4);
+ uint32_t type_id = offset;
+ uint32_t type_ptr_id = offset + 1;
+ uint32_t type_ptr_ptr_id = offset + 2;
+ uint32_t var_id = offset + 3;
+
+ // Create a buffer to hold extra data, including the swizzle constants.
+ SPIRType uint_type;
+ uint_type.basetype = SPIRType::UInt;
+ uint_type.width = 32;
+ set<SPIRType>(type_id, uint_type);
+
+ SPIRType uint_type_pointer = uint_type;
+ uint_type_pointer.pointer = true;
+ uint_type_pointer.pointer_depth = 1;
+ uint_type_pointer.parent_type = type_id;
+ uint_type_pointer.storage = StorageClassUniform;
+ set<SPIRType>(type_ptr_id, uint_type_pointer);
+ set_decoration(type_ptr_id, DecorationArrayStride, 4);
+
+ SPIRType uint_type_pointer2 = uint_type_pointer;
+ uint_type_pointer2.pointer_depth++;
+ uint_type_pointer2.parent_type = type_ptr_id;
+ set<SPIRType>(type_ptr_ptr_id, uint_type_pointer2);
+
+ set<SPIRVariable>(var_id, type_ptr_ptr_id, StorageClassUniformConstant);
+ return var_id;
}
static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
@@ -416,7 +687,7 @@ void CompilerMSL::emit_entry_point_declarations()
// FIXME: Get test coverage here ...
// Emit constexpr samplers here.
- for (auto &samp : constexpr_samplers)
+ for (auto &samp : constexpr_samplers_by_id)
{
auto &var = get<SPIRVariable>(samp.first);
auto &type = get<SPIRType>(var.basetype);
@@ -532,9 +803,82 @@ void CompilerMSL::emit_entry_point_declarations()
convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")"));
}
- statement("constexpr sampler ",
- type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
- "(", merge(args), ");");
+ // If we would emit no arguments, then omit the parentheses entirely. Otherwise,
+ // we'll wind up with a "most vexing parse" situation.
+ if (args.empty())
+ statement("constexpr sampler ",
+ type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+ ";");
+ else
+ statement("constexpr sampler ",
+ type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+ "(", merge(args), ");");
+ }
+
+ // Emit dynamic buffers here.
+ for (auto &dynamic_buffer : buffers_requiring_dynamic_offset)
+ {
+ if (!dynamic_buffer.second.second)
+ {
+ // Could happen if no buffer was used at requested binding point.
+ continue;
+ }
+
+ const auto &var = get<SPIRVariable>(dynamic_buffer.second.second);
+ uint32_t var_id = var.self;
+ const auto &type = get_variable_data_type(var);
+ string name = to_name(var.self);
+ uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
+ uint32_t arg_id = argument_buffer_ids[desc_set];
+ uint32_t base_index = dynamic_buffer.second.first;
+
+ if (!type.array.empty())
+ {
+ // This is complicated, because we need to support arrays of arrays.
+ // And it's even worse if the outermost dimension is a runtime array, because now
+ // all this complicated goop has to go into the shader itself. (FIXME)
+ if (!type.array[type.array.size() - 1])
+ SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
+ else
+ {
+ statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
+ type_to_array_glsl(type), " =");
+ uint32_t dim = uint32_t(type.array.size());
+ uint32_t j = 0;
+ for (SmallVector<uint32_t> indices(type.array.size());
+ indices[type.array.size() - 1] < to_array_size_literal(type); j++)
+ {
+ while (dim > 0)
+ {
+ begin_scope();
+ --dim;
+ }
+
+ string arrays;
+ for (uint32_t i = uint32_t(type.array.size()); i; --i)
+ arrays += join("[", indices[i - 1], "]");
+ statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
+ to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
+ to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
+ arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
+
+ while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
+ {
+ end_scope(",");
+ indices[dim++] = 0;
+ }
+ }
+ end_scope_decl();
+ statement_no_indent("");
+ }
+ }
+ else
+ {
+ statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(",
+ get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
+ get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
+ ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
+ }
}
// Emit buffer arrays here.
@@ -543,10 +887,10 @@ void CompilerMSL::emit_entry_point_declarations()
const auto &var = get<SPIRVariable>(array_id);
const auto &type = get_variable_data_type(var);
string name = to_name(array_id);
- statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
+ statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
begin_scope();
- for (uint32_t i = 0; i < type.array[0]; ++i)
- statement(name + "_" + convert_to_string(i) + ",");
+ for (uint32_t i = 0; i < to_array_size_literal(type); ++i)
+ statement(name, "_", i, ",");
end_scope_decl();
statement_no_indent("");
}
@@ -564,7 +908,7 @@ string CompilerMSL::compile()
backend.float_literal_suffix = false;
backend.uint32_t_literal_suffix = true;
backend.int16_t_literal_suffix = "";
- backend.uint16_t_literal_suffix = "u";
+ backend.uint16_t_literal_suffix = "";
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
backend.basic_int8_type = "char";
@@ -572,6 +916,8 @@ string CompilerMSL::compile()
backend.basic_int16_type = "short";
backend.basic_uint16_type = "ushort";
backend.discard_literal = "discard_fragment()";
+ backend.demote_literal = "unsupported-demote";
+ backend.boolean_mix_function = "select";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.use_initializer_list = true;
@@ -580,34 +926,46 @@ string CompilerMSL::compile()
backend.unsized_array_supported = false;
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
- backend.boolean_mix_support = false;
backend.allow_truncated_access_chain = true;
backend.array_is_value_type = false;
backend.comparison_image_samples_scalar = true;
backend.native_pointers = true;
backend.nonuniform_qualifier = "";
+ backend.support_small_type_sampling_result = true;
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
- replace_illegal_names();
+ // Initialize array here rather than constructor, MSVC 2013 workaround.
+ for (auto &id : next_metal_resource_ids)
+ id = 0;
- struct_member_padding.clear();
+ fixup_type_alias();
+ replace_illegal_names();
build_function_control_flow_graphs_and_analyze();
update_active_builtins();
analyze_image_and_sampler_usage();
analyze_sampled_image_usage();
+ analyze_interlocked_resource_usage();
+ preprocess_op_codes();
build_implicit_builtins();
fixup_image_load_store_access();
set_enabled_interface_variables(get_active_interface_variables());
- if (aux_buffer_id)
- active_interface_variables.insert(aux_buffer_id);
-
- // Preprocess OpCodes to extract the need to output additional header content
- preprocess_op_codes();
+ if (swizzle_buffer_id)
+ active_interface_variables.insert(swizzle_buffer_id);
+ if (buffer_size_buffer_id)
+ active_interface_variables.insert(buffer_size_buffer_id);
+ if (view_mask_buffer_id)
+ active_interface_variables.insert(view_mask_buffer_id);
+ if (dynamic_offsets_buffer_id)
+ active_interface_variables.insert(dynamic_offsets_buffer_id);
+ if (builtin_layer_id)
+ active_interface_variables.insert(builtin_layer_id);
+ if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
+ active_interface_variables.insert(builtin_dispatch_base_id);
// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
@@ -633,6 +991,7 @@ string CompilerMSL::compile()
// Mark any non-stage-in structs to be tightly packed.
mark_packable_structs();
+ reorder_type_alias();
// Add fixup hooks required by shader inputs and outputs. This needs to happen before
// the loop, so the hooks aren't added multiple times.
@@ -659,6 +1018,8 @@ string CompilerMSL::compile()
next_metal_resource_index_buffer = 0;
next_metal_resource_index_texture = 0;
next_metal_resource_index_sampler = 0;
+ for (auto &id : next_metal_resource_ids)
+ id = 0;
// Move constructor for this type is broken on GCC 4.9 ...
buffer.reset();
@@ -700,6 +1061,9 @@ void CompilerMSL::preprocess_op_codes()
is_rasterization_disabled = true;
capture_output_to_buffer = true;
}
+
+ if (preproc.needs_subgroup_invocation_id)
+ needs_subgroup_invocation_id = true;
}
// Move the Private and Workgroup global variables to the entry function.
@@ -783,6 +1147,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
+ case OpArrayLength:
{
uint32_t base_id = ops[2];
if (global_var_ids.find(base_id) != global_var_ids.end())
@@ -890,7 +1255,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
added_out = true;
}
type_id = get<SPIRVariable>(arg_id).basetype;
- p_type = &get<SPIRType>(type_id);
uint32_t next_id = ir.increase_bound_by(1);
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
@@ -962,7 +1326,7 @@ void CompilerMSL::mark_packable_structs()
}
// If the specified type is a struct, it and any nested structs
-// are marked as packable with the SPIRVCrossDecorationPacked decoration,
+// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration,
void CompilerMSL::mark_as_packable(SPIRType &type)
{
// If this is not the base type (eg. it's a pointer or array), tunnel down
@@ -974,10 +1338,10 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
if (type.basetype == SPIRType::Struct)
{
- set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+ set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
// Recurse
- size_t mbr_cnt = type.member_types.size();
+ uint32_t mbr_cnt = uint32_t(type.member_types.size());
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
uint32_t mbr_type_id = type.member_types[mbr_idx];
@@ -1080,7 +1444,7 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
else if (!strip_array)
ir.meta[var.self].decoration.qualified_alias = qual_var_name;
- if (var.storage == StorageClassOutput && var.initializer != 0)
+ if (var.storage == StorageClassOutput && var.initializer != ID(0))
{
entry_func.fixup_hooks_in.push_back(
[=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); });
@@ -1768,8 +2132,7 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t
bool in_array = false;
for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++)
{
- auto &mbr_dec = ir.meta[ib_type_id].members[i];
- uint32_t var_id = mbr_dec.extended.ib_orig_id;
+ uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID);
if (!var_id)
continue;
auto &var = get<SPIRVariable>(var_id);
@@ -1823,21 +2186,50 @@ void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t
// Returns the ID of the newly added variable, or zero if no variable was added.
uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
{
- // Accumulate the variables that should appear in the interface struct
+ // Accumulate the variables that should appear in the interface struct.
SmallVector<SPIRVariable *> vars;
- bool incl_builtins = (storage == StorageClassOutput || is_tessellation_shader());
+ bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader();
+ bool has_seen_barycentric = false;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
+ if (var.storage != storage)
+ return;
+
auto &type = this->get<SPIRType>(var.basetype);
- BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
- if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) &&
- !is_hidden_variable(var, incl_builtins) && type.pointer &&
- (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch &&
- (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
- bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer ||
- bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask ||
- (get_execution_model() == ExecutionModelTessellationEvaluation &&
- (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner))))
+
+ bool is_builtin = is_builtin_variable(var);
+ auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
+
+ // These builtins are part of the stage in/out structs.
+ bool is_interface_block_builtin =
+ (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance ||
+ bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
+ bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth ||
+ bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) ||
+ (get_execution_model() == ExecutionModelTessellationEvaluation &&
+ (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
+
+ bool is_active = interface_variable_exists_in_entry_point(var.self);
+ if (is_builtin && is_active)
+ {
+ // Only emit the builtin if it's active in this entry point. Interface variable list might lie.
+ is_active = has_active_builtin(bi_type, storage);
+ }
+
+ bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch;
+
+ bool hidden = is_hidden_variable(var, incl_builtins);
+ // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments.
+ if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV))
+ {
+ if (has_seen_barycentric)
+ SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL.");
+ has_seen_barycentric = true;
+ hidden = false;
+ }
+
+ if (is_active && !hidden && type.pointer && filter_patch_decoration &&
+ (!is_builtin || is_interface_block_builtin))
{
vars.push_back(&var);
}
@@ -1957,7 +2349,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref);
set_name(ib_var_id, ib_var_ref);
- for (auto p_var : vars)
+ for (auto *p_var : vars)
{
bool strip_array =
(get_execution_model() == ExecutionModelTessellationControl ||
@@ -2055,7 +2447,8 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil
auto &type = get<SPIRType>(type_id);
if ((builtin == BuiltInSampleMask && is_array(type)) ||
- ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex) && type.basetype != SPIRType::UInt))
+ ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) &&
+ type.basetype != SPIRType::UInt))
{
uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
uint32_t base_type_id = next_id++;
@@ -2163,188 +2556,621 @@ uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t l
return type_id;
}
+void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
+{
+ set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
+
+ // Problem case! Struct needs to be placed at an awkward alignment.
+ // Mark every member of the child struct as packed.
+ uint32_t mbr_cnt = uint32_t(type.member_types.size());
+ for (uint32_t i = 0; i < mbr_cnt; i++)
+ {
+ auto &mbr_type = get<SPIRType>(type.member_types[i]);
+ if (mbr_type.basetype == SPIRType::Struct)
+ {
+ // Recursively mark structs as packed.
+ auto *struct_type = &mbr_type;
+ while (!struct_type->array.empty())
+ struct_type = &get<SPIRType>(struct_type->parent_type);
+ mark_struct_members_packed(*struct_type);
+ }
+ else if (!is_scalar(mbr_type))
+ set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked);
+ }
+}
+
+void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
+{
+ uint32_t mbr_cnt = uint32_t(type.member_types.size());
+ for (uint32_t i = 0; i < mbr_cnt; i++)
+ {
+ auto &mbr_type = get<SPIRType>(type.member_types[i]);
+ if (mbr_type.basetype == SPIRType::Struct)
+ {
+ auto *struct_type = &mbr_type;
+ while (!struct_type->array.empty())
+ struct_type = &get<SPIRType>(struct_type->parent_type);
+
+ if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked))
+ continue;
+
+ uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i);
+ uint32_t msl_size = get_declared_struct_member_size_msl(type, i);
+ uint32_t spirv_offset = type_struct_member_offset(type, i);
+ uint32_t spirv_offset_next;
+ if (i + 1 < mbr_cnt)
+ spirv_offset_next = type_struct_member_offset(type, i + 1);
+ else
+ spirv_offset_next = spirv_offset + msl_size;
+
+ // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes,
+ // and the next member will be placed at offset 12.
+ bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0;
+ bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next;
+ uint32_t array_stride = 0;
+ bool struct_needs_explicit_padding = false;
+
+ // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct.
+ if (!mbr_type.array.empty())
+ {
+ array_stride = type_struct_member_array_stride(type, i);
+ uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+ for (uint32_t dim = 0; dim < dimensions; dim++)
+ {
+ uint32_t array_size = to_array_size_literal(mbr_type, dim);
+ array_stride /= max(array_size, 1u);
+ }
+
+ // Set expected struct size based on ArrayStride.
+ struct_needs_explicit_padding = true;
+
+ // If struct size is larger than array stride, we might be able to fit, if we tightly pack.
+ if (get_declared_struct_size_msl(*struct_type) > array_stride)
+ struct_is_too_large = true;
+ }
+
+ if (struct_is_misaligned || struct_is_too_large)
+ mark_struct_members_packed(*struct_type);
+ mark_scalar_layout_structs(*struct_type);
+
+ if (struct_needs_explicit_padding)
+ {
+ msl_size = get_declared_struct_size_msl(*struct_type, true, true);
+ if (array_stride < msl_size)
+ {
+ SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type.");
+ }
+ else
+ {
+ if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+ {
+ if (array_stride !=
+ get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+ SPIRV_CROSS_THROW(
+ "A struct is used with different array strides. Cannot express this in MSL.");
+ }
+ else
+ set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride);
+ }
+ }
+ }
+ }
+}
+
// Sort the members of the struct type by offset, and pack and then pad members where needed
// to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
// occurs first, followed by padding, because packing a member reduces both its size and its
// natural alignment, possibly requiring a padding member to be added ahead of it.
-void CompilerMSL::align_struct(SPIRType &ib_type)
+void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs)
{
- uint32_t &ib_type_id = ib_type.self;
+ // We align structs recursively, so stop any redundant work.
+ ID &ib_type_id = ib_type.self;
+ if (aligned_structs.count(ib_type_id))
+ return;
+ aligned_structs.insert(ib_type_id);
// Sort the members of the interface structure by their offset.
// They should already be sorted per SPIR-V spec anyway.
MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
member_sorter.sort();
- uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
+ auto mbr_cnt = uint32_t(ib_type.member_types.size());
+
+ for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+ {
+ // Pack any dependent struct types before we pack a parent struct.
+ auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]);
+ if (mbr_type.basetype == SPIRType::Struct)
+ align_struct(mbr_type, aligned_structs);
+ }
// Test the alignment of each member, and if a member should be closer to the previous
// member than the default spacing expects, it is likely that the previous member is in
// a packed format. If so, and the previous member is packable, pack it.
- // For example...this applies to any 3-element vector that is followed by a scalar.
- uint32_t curr_offset = 0;
+ // For example ... this applies to any 3-element vector that is followed by a scalar.
+ uint32_t msl_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
- if (is_member_packable(ib_type, mbr_idx))
- {
- set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked);
- set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType,
- ib_type.member_types[mbr_idx]);
- }
+ // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V
+ // offsets, array strides and matrix strides.
+ ensure_member_packing_rules_msl(ib_type, mbr_idx);
- // Align current offset to the current member's default alignment.
- size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
- uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
+ // Align current offset to the current member's default alignment. If the member was packed, it will observe
+ // the updated alignment here.
+ uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1;
+ uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
// Fetch the member offset as declared in the SPIRV.
- uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
- if (mbr_offset > aligned_curr_offset)
+ uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
+ if (spirv_mbr_offset > aligned_msl_offset)
{
// Since MSL and SPIR-V have slightly different struct member alignment and
- // size rules, we'll pad to standard C-packing rules. If the member is farther
+ // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther
// away than C-packing, expects, add an inert padding member before the the member.
- MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx);
- struct_member_padding[key] = mbr_offset - curr_offset;
+ uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset;
+ set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes);
+
+ // Re-align as a sanity check that aligning post-padding matches up.
+ msl_offset += padding_bytes;
+ aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
+ }
+ else if (spirv_mbr_offset < aligned_msl_offset)
+ {
+ // This should not happen, but deal with unexpected scenarios.
+ // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V.
+ SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL.");
}
+ assert(aligned_msl_offset == spirv_mbr_offset);
+
// Increment the current offset to be positioned immediately after the current member.
// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
if (mbr_idx + 1 < mbr_cnt)
- curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
+ msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx);
}
}
-// Returns whether the specified struct member supports a packable type
-// variation that is smaller than the unpacked variation of that type.
-bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
+bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const
{
- // We've already marked it as packable
- if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
- return true;
-
- auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
-
- uint32_t component_size = mbr_type.width / 8;
- uint32_t unpacked_mbr_size;
- if (mbr_type.vecsize == 3)
- unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
- else
- unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
+ auto &mbr_type = get<SPIRType>(type.member_types[index]);
+ uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset);
+
+ if (index + 1 < type.member_types.size())
+ {
+ // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member,
+ // we *must* perform some kind of remapping, no way getting around it.
+ // We can always pad after this member if necessary, so that case is fine.
+ uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset);
+ assert(spirv_offset_next >= spirv_offset);
+ uint32_t maximum_size = spirv_offset_next - spirv_offset;
+ uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index);
+ if (msl_mbr_size > maximum_size)
+ return false;
+ }
- // Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
- // but we will use the same mechanism.
- if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 &&
- type_struct_member_array_stride(ib_type, index) == 4 * component_size)
+ if (!mbr_type.array.empty())
{
- return true;
+ // If we have an array type, array stride must match exactly with SPIR-V.
+ uint32_t spirv_array_stride = type_struct_member_array_stride(type, index);
+ uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index);
+ if (spirv_array_stride != msl_array_stride)
+ return false;
}
- // Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
- // This can happen for struct A { float a }; A a[]; in std140 layout.
- // TODO: Emit a padded struct which can be used for this purpose.
- if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct)
+ if (is_matrix(mbr_type))
{
- size_t declared_struct_size = get_declared_struct_size(mbr_type);
- size_t alignment = get_declared_struct_member_alignment(ib_type, index);
- declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
- if (type_struct_member_array_stride(ib_type, index) > declared_struct_size)
- return true;
+ // Need to check MatrixStride as well.
+ uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index);
+ uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index);
+ if (spirv_matrix_stride != msl_matrix_stride)
+ return false;
}
- // TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
- //if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16)
- // SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout.");
-
- // Only vectors or 3-row matrices need to be packed.
- if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
+ // Now, we check alignment.
+ uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index);
+ if ((spirv_offset % msl_alignment) != 0)
return false;
- // Only row-major matrices need to be packed.
- if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
- return false;
+ // We're in the clear.
+ return true;
+}
- if (is_array(mbr_type))
- {
- // If member is an array, and the array stride is larger than the type needs, don't pack it.
- // Take into consideration multi-dimentional arrays.
- uint32_t md_elem_cnt = 1;
- size_t last_elem_idx = mbr_type.array.size() - 1;
- for (uint32_t i = 0; i < last_elem_idx; i++)
- md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u);
+// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions.
+// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types.
+// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides.
+void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index)
+{
+ if (validate_member_packing_rules_msl(ib_type, index))
+ return;
+
+ // We failed validation.
+ // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
+ // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
+ // that struct alignment == max alignment of all members and struct size depends on this alignment.
+ auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
+ if (mbr_type.basetype == SPIRType::Struct)
+ SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
- uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
+ // Perform remapping here.
+ set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+
+ // Try validating again, now with packed.
+ if (validate_member_packing_rules_msl(ib_type, index))
+ return;
+
+ // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect.
+ // A lot of work goes here ...
+ // We will need remapping on Load and Store to translate the types between Logical and Physical.
+
+ // First, we check if we have small vector std140 array.
+ // We detect this if we have an array of vectors, and array stride is greater than number of elements.
+ if (!mbr_type.array.empty() && !is_matrix(mbr_type))
+ {
uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
- return unpacked_array_stride > array_stride;
+
+ // Hack off array-of-arrays until we find the array stride per element we must have to make it work.
+ uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+ for (uint32_t dim = 0; dim < dimensions; dim++)
+ array_stride /= max(to_array_size_literal(mbr_type, dim), 1u);
+
+ uint32_t elems_per_stride = array_stride / (mbr_type.width / 8);
+
+ if (elems_per_stride == 3)
+ SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+ else if (elems_per_stride > 4)
+ SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+ auto physical_type = mbr_type;
+ physical_type.vecsize = elems_per_stride;
+ physical_type.parent_type = 0;
+ uint32_t type_id = ir.increase_bound_by(1);
+ set<SPIRType>(type_id, physical_type);
+ set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+ set_decoration(type_id, DecorationArrayStride, array_stride);
+
+ // Remove packed_ for vectors of size 1, 2 and 4.
+ if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked))
+ SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix "
+ "scalar and std140 layout rules.");
+ else
+ unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
}
- else
+ else if (is_matrix(mbr_type))
{
- uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
- // For vectors, pack if the member's offset doesn't conform to the
- // type's usual alignment. For example, a float3 at offset 4.
- if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size))
- return true;
- // Pack if there is not enough space between this member and next.
- // If last member, only pack if it's a row-major matrix.
- if (index < ib_type.member_types.size() - 1)
- {
- uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
- return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
- }
+ // MatrixStride might be std140-esque.
+ uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index);
+
+ uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8);
+
+ if (elems_per_stride == 3)
+ SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+ else if (elems_per_stride > 4)
+ SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+ bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
+
+ auto physical_type = mbr_type;
+ physical_type.parent_type = 0;
+ if (row_major)
+ physical_type.columns = elems_per_stride;
+ else
+ physical_type.vecsize = elems_per_stride;
+ uint32_t type_id = ir.increase_bound_by(1);
+ set<SPIRType>(type_id, physical_type);
+ set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+
+ // Remove packed_ for vectors of size 1, 2 and 4.
+ if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked))
+ SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix "
+ "scalar and std140 layout rules.");
else
- return is_matrix(mbr_type);
+ unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
}
-}
-// Returns a combination of type ID and member index for use as hash key
-MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index)
-{
- MSLStructMemberKey k = type_id;
- k <<= 32;
- k += index;
- return k;
+ // This better validate now, or we must fail gracefully.
+ if (!validate_member_packing_rules_msl(ib_type, index))
+ SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
}
void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
{
- if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) ||
- get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0)
+ auto &type = expression_type(rhs_expression);
+
+ bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID);
+ bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked);
+ auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
+ auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
+
+ bool transpose = lhs_e && lhs_e->need_transpose;
+
+ // No physical type remapping, and no packed type, so can just emit a store directly.
+ if (!lhs_remapped_type && !lhs_packed_type)
+ {
+ // We might not be dealing with remapped physical types or packed types,
+ // but we might be doing a clean store to a row-major matrix.
+ // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any.
+ if (is_matrix(type) && lhs_e && lhs_e->need_transpose)
+ {
+ if (!rhs_e)
+ SPIRV_CROSS_THROW("Need to transpose right-side expression of a store to row-major matrix, but it is "
+ "not a SPIRExpression.");
+ lhs_e->need_transpose = false;
+
+ if (rhs_e && rhs_e->need_transpose)
+ {
+ // Direct copy, but might need to unpack RHS.
+ // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T.
+ rhs_e->need_transpose = false;
+ statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression),
+ ";");
+ rhs_e->need_transpose = true;
+ }
+ else
+ statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");");
+
+ lhs_e->need_transpose = true;
+ register_write(lhs_expression);
+ }
+ else if (lhs_e && lhs_e->need_transpose)
+ {
+ lhs_e->need_transpose = false;
+
+ // Storing a column to a row-major matrix. Unroll the write.
+ for (uint32_t c = 0; c < type.vecsize; c++)
+ {
+ auto lhs_expr = to_dereferenced_expression(lhs_expression);
+ auto column_index = lhs_expr.find_last_of('[');
+ if (column_index != string::npos)
+ {
+ statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
+ to_extract_component_expression(rhs_expression, c), ";");
+ }
+ }
+ lhs_e->need_transpose = true;
+ register_write(lhs_expression);
+ }
+ else
+ CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
+ }
+ else if (!lhs_remapped_type && !is_matrix(type) && !transpose)
{
+ // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly,
+ // since they are declared as array of vectors instead, and we need the fallback path below.
CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
}
else
{
- // Special handling when storing to a float[] or float2[] in std140 layout.
+ // Special handling when storing to a remapped physical type.
+ // This is mostly to deal with std140 padded matrices or vectors.
+
+ TypeID physical_type_id = lhs_remapped_type ?
+ ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) :
+ type.self;
+
+ auto &physical_type = get<SPIRType>(physical_type_id);
+
+ static const char *swizzle_lut[] = {
+ ".x",
+ ".xy",
+ ".xyz",
+ "",
+ };
+
+ if (is_matrix(type))
+ {
+ // Packed matrices are stored as arrays of packed vectors, so we need
+ // to assign the vectors one at a time.
+ // For row-major matrices, we need to transpose the *right-hand* side,
+ // not the left-hand side.
- auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
- string lhs = to_dereferenced_expression(lhs_expression);
- string rhs = to_pointer_expression(rhs_expression);
+ // Lots of cases to cover here ...
- // Unpack the expression so we can store to it with a float or float2.
- // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
- if (is_scalar(type) && is_array(type))
- lhs = enclose_expression(lhs) + ".x";
- else if (is_vector(type) && type.vecsize == 2 && is_array(type))
- lhs = enclose_expression(lhs) + ".xy";
+ bool rhs_transpose = rhs_e && rhs_e->need_transpose;
+
+ // We're dealing with transpose manually.
+ if (rhs_transpose)
+ rhs_e->need_transpose = false;
+
+ if (transpose)
+ {
+ // We're dealing with transpose manually.
+ lhs_e->need_transpose = false;
+
+ const char *store_swiz = "";
+ if (physical_type.columns != type.columns)
+ store_swiz = swizzle_lut[type.columns - 1];
+
+ if (rhs_transpose)
+ {
+ // If RHS is also transposed, we can just copy row by row.
+ for (uint32_t i = 0; i < type.vecsize; i++)
+ {
+ statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ",
+ to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];");
+ }
+ }
+ else
+ {
+ auto vector_type = expression_type(rhs_expression);
+ vector_type.vecsize = vector_type.columns;
+ vector_type.columns = 1;
+
+ // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+ // so pick out individual components instead.
+ for (uint32_t i = 0; i < type.vecsize; i++)
+ {
+ string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+ for (uint32_t j = 0; j < vector_type.vecsize; j++)
+ {
+ rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]");
+ if (j + 1 < vector_type.vecsize)
+ rhs_row += ", ";
+ }
+ rhs_row += ")";
+
+ statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";");
+ }
+ }
+
+ // We're dealing with transpose manually.
+ lhs_e->need_transpose = true;
+ }
+ else
+ {
+ const char *store_swiz = "";
+ if (physical_type.vecsize != type.vecsize)
+ store_swiz = swizzle_lut[type.vecsize - 1];
+
+ if (rhs_transpose)
+ {
+ auto vector_type = expression_type(rhs_expression);
+ vector_type.columns = 1;
+
+ // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+ // so pick out individual components instead.
+ for (uint32_t i = 0; i < type.columns; i++)
+ {
+ string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+ for (uint32_t j = 0; j < vector_type.vecsize; j++)
+ {
+ // Need to explicitly unpack expression since we've mucked with transpose state.
+ auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression);
+ rhs_row += join(unpacked_expr, "[", j, "][", i, "]");
+ if (j + 1 < vector_type.vecsize)
+ rhs_row += ", ";
+ }
+ rhs_row += ")";
+
+ statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ", rhs_row, ";");
+ }
+ }
+ else
+ {
+ // Copy column-by-column.
+ for (uint32_t i = 0; i < type.columns; i++)
+ {
+ statement(to_enclosed_expression(lhs_expression), "[", i, "]", store_swiz, " = ",
+ to_enclosed_unpacked_expression(rhs_expression), "[", i, "];");
+ }
+ }
+ }
+
+ // We're dealing with transpose manually.
+ if (rhs_transpose)
+ rhs_e->need_transpose = true;
+ }
+ else if (transpose)
+ {
+ lhs_e->need_transpose = false;
+
+ // Storing a column to a row-major matrix. Unroll the write.
+ for (uint32_t c = 0; c < type.vecsize; c++)
+ {
+ auto lhs_expr = to_enclosed_expression(lhs_expression);
+ auto column_index = lhs_expr.find_last_of('[');
+ if (column_index != string::npos)
+ {
+ statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
+ to_extract_component_expression(rhs_expression, c), ";");
+ }
+ }
+
+ lhs_e->need_transpose = true;
+ }
+ else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize)
+ {
+ assert(type.vecsize >= 1 && type.vecsize <= 3);
+
+ // If we have packed types, we cannot use swizzled stores.
+ // We could technically unroll the store for each element if needed.
+ // When remapping to a std140 physical type, we always get float4,
+ // and the packed decoration should always be removed.
+ assert(!lhs_packed_type);
+
+ string lhs = to_dereferenced_expression(lhs_expression);
+ string rhs = to_pointer_expression(rhs_expression);
+
+ // Unpack the expression so we can store to it with a float or float2.
+ // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
+ lhs = enclose_expression(lhs) + swizzle_lut[type.vecsize - 1];
+ if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+ statement(lhs, " = ", rhs, ";");
+ }
+ else if (!is_matrix(type))
+ {
+ string lhs = to_dereferenced_expression(lhs_expression);
+ string rhs = to_pointer_expression(rhs_expression);
+ if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+ statement(lhs, " = ", rhs, ";");
+ }
- if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
- statement(lhs, " = ", rhs, ";");
register_write(lhs_expression);
}
}
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
-string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
+// Also, handle special physical ID remapping scenarios, similar to emit_store_statement().
+string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id,
+ bool packed, bool row_major)
{
- const SPIRType *packed_type = nullptr;
- if (packed_type_id)
- packed_type = &get<SPIRType>(packed_type_id);
+ // Trivial case, nothing to do.
+ if (physical_type_id == 0 && !packed)
+ return expr_str;
+
+ const SPIRType *physical_type = nullptr;
+ if (physical_type_id)
+ physical_type = &get<SPIRType>(physical_type_id);
+
+ static const char *swizzle_lut[] = {
+ ".x",
+ ".xy",
+ ".xyz",
+ };
+
+ // std140 array cases for vectors.
+ if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && physical_type->vecsize > type.vecsize)
+ {
+ assert(type.vecsize >= 1 && type.vecsize <= 3);
+ return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
+ }
+ else if (is_matrix(type))
+ {
+ // Packed matrices are stored as arrays of packed vectors. Unfortunately,
+ // we can't just pass the array straight to the matrix constructor. We have to
+ // pass each vector individually, so that they can be unpacked to normal vectors.
+ if (!physical_type)
+ physical_type = &type;
+
+ uint32_t vecsize = type.vecsize;
+ uint32_t columns = type.columns;
+ if (row_major)
+ swap(vecsize, columns);
+
+ uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize;
+
+ const char *base_type = type.width == 16 ? "half" : "float";
+ string unpack_expr = join(base_type, columns, "x", vecsize, "(");
+
+ const char *load_swiz = "";
- // float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
- if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
- return enclose_expression(expr_str) + ".x";
- else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
- return enclose_expression(expr_str) + ".xy";
+ if (physical_vecsize != vecsize)
+ load_swiz = swizzle_lut[vecsize - 1];
+
+ for (uint32_t i = 0; i < columns; i++)
+ {
+ if (i > 0)
+ unpack_expr += ", ";
+
+ if (packed)
+ unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz);
+ else
+ unpack_expr += join(expr_str, "[", i, "]", load_swiz);
+ }
+
+ unpack_expr += ")";
+ return unpack_expr;
+ }
else
return join(type_to_glsl(type), "(", expr_str, ")");
}
@@ -2399,6 +3225,39 @@ void CompilerMSL::emit_custom_functions()
if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
+ if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
+ {
+ // Unfortunately, this one needs a lot of the other functions to compile OK.
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW(
+ "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
+ spv_function_implementations.insert(SPVFuncImplForwardArgs);
+ spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
+ if (msl_options.swizzle_texture_samples)
+ spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
+ for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+ i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+ spv_function_implementations.insert(static_cast<SPVFuncImpl>(i));
+ spv_function_implementations.insert(SPVFuncImplExpandITUFullRange);
+ spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange);
+ spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709);
+ spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601);
+ spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
+ }
+
+ for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+ i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+ if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i)))
+ spv_function_implementations.insert(SPVFuncImplForwardArgs);
+
+ if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
+ spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
+ spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
+ {
+ spv_function_implementations.insert(SPVFuncImplForwardArgs);
+ spv_function_implementations.insert(SPVFuncImplGetSwizzle);
+ }
+
for (auto &spv_func : spv_function_implementations)
{
switch (spv_func)
@@ -2406,7 +3265,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplMod:
statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
statement("template<typename Tx, typename Ty>");
- statement("Tx mod(Tx x, Ty y)");
+ statement("inline Tx mod(Tx x, Ty y)");
begin_scope();
statement("return x - y * floor(x / y);");
end_scope();
@@ -2416,7 +3275,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplRadians:
statement("// Implementation of the GLSL radians() function");
statement("template<typename T>");
- statement("T radians(T d)");
+ statement("inline T radians(T d)");
begin_scope();
statement("return d * T(0.01745329251);");
end_scope();
@@ -2426,7 +3285,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplDegrees:
statement("// Implementation of the GLSL degrees() function");
statement("template<typename T>");
- statement("T degrees(T r)");
+ statement("inline T degrees(T r)");
begin_scope();
statement("return r * T(57.2957795131);");
end_scope();
@@ -2436,7 +3295,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplFindILsb:
statement("// Implementation of the GLSL findLSB() function");
statement("template<typename T>");
- statement("T findLSB(T x)");
+ statement("inline T spvFindLSB(T x)");
begin_scope();
statement("return select(ctz(x), T(-1), x == T(0));");
end_scope();
@@ -2446,7 +3305,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplFindUMsb:
statement("// Implementation of the unsigned GLSL findMSB() function");
statement("template<typename T>");
- statement("T findUMSB(T x)");
+ statement("inline T spvFindUMSB(T x)");
begin_scope();
statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
end_scope();
@@ -2456,7 +3315,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplFindSMsb:
statement("// Implementation of the signed GLSL findMSB() function");
statement("template<typename T>");
- statement("T findSMSB(T x)");
+ statement("inline T spvFindSMSB(T x)");
begin_scope();
statement("T v = select(x, T(-1) - x, x < T(0));");
statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
@@ -2467,7 +3326,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplSSign:
statement("// Implementation of the GLSL sign() function for integer types");
statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
- statement("T sign(T x)");
+ statement("inline T sign(T x)");
begin_scope();
statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
end_scope();
@@ -2475,40 +3334,27 @@ void CompilerMSL::emit_custom_functions()
break;
case SPVFuncImplArrayCopy:
- statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
- "assignment.");
- statement("template<typename T, uint N>");
- statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])");
- begin_scope();
- statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
- end_scope();
- statement("");
-
- statement("template<typename T, uint N>");
- statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])");
- begin_scope();
- statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
- end_scope();
- statement("");
- break;
-
case SPVFuncImplArrayOfArrayCopy2Dim:
case SPVFuncImplArrayOfArrayCopy3Dim:
case SPVFuncImplArrayOfArrayCopy4Dim:
case SPVFuncImplArrayOfArrayCopy5Dim:
case SPVFuncImplArrayOfArrayCopy6Dim:
{
+ // Unfortunately we cannot template on the address space, so combinatorial explosion it is.
static const char *function_name_tags[] = {
- "FromStack",
- "FromConstant",
+ "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack",
+ "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup",
};
static const char *src_address_space[] = {
- "thread const",
- "constant",
+ "constant", "constant", "thread const", "thread const", "threadgroup const", "threadgroup const",
};
- for (uint32_t variant = 0; variant < 2; variant++)
+ static const char *dst_address_space[] = {
+ "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
+ };
+
+ for (uint32_t variant = 0; variant < 6; variant++)
{
uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
string tmp = "template<typename T";
@@ -2528,17 +3374,23 @@ void CompilerMSL::emit_custom_functions()
array_arg += "]";
}
- statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg,
- ", ", src_address_space[variant], " T (&src)", array_arg, ")");
+ statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(",
+ dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant],
+ " T (&src)", array_arg, ")");
begin_scope();
statement("for (uint i = 0; i < A; i++)");
begin_scope();
- statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
+
+ if (dimensions == 1)
+ statement("dst[i] = src[i];");
+ else
+ statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
end_scope();
end_scope();
statement("");
}
+
break;
}
@@ -2546,7 +3398,7 @@ void CompilerMSL::emit_custom_functions()
{
string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
- statement("uint2 spvTexelBufferCoord(uint tc)");
+ statement("inline uint2 spvTexelBufferCoord(uint tc)");
begin_scope();
statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
end_scope();
@@ -2572,7 +3424,7 @@ void CompilerMSL::emit_custom_functions()
statement("");
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
- statement("float4x4 spvInverse4x4(float4x4 m)");
+ statement("inline float4x4 spvInverse4x4(float4x4 m)");
begin_scope();
statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
@@ -2637,7 +3489,7 @@ void CompilerMSL::emit_custom_functions()
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
- statement("float3x3 spvInverse3x3(float3x3 m)");
+ statement("inline float3x3 spvInverse3x3(float3x3 m)");
begin_scope();
statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
@@ -2667,7 +3519,7 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplInverse2x2:
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
- statement("float2x2 spvInverse2x2(float2x2 m)");
+ statement("inline float2x2 spvInverse2x2(float2x2 m)");
begin_scope();
statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
@@ -2688,65 +3540,24 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
- case SPVFuncImplRowMajor2x3:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
- begin_scope();
- statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
- end_scope();
- statement("");
- break;
-
- case SPVFuncImplRowMajor2x4:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
- begin_scope();
- statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
- "m[1][3]));");
- end_scope();
- statement("");
- break;
-
- case SPVFuncImplRowMajor3x2:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
- begin_scope();
- statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
- end_scope();
- statement("");
- break;
-
- case SPVFuncImplRowMajor3x4:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
- begin_scope();
- statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
- "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
- end_scope();
- statement("");
- break;
-
- case SPVFuncImplRowMajor4x2:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
+ case SPVFuncImplForwardArgs:
+ statement("template<typename T> struct spvRemoveReference { typedef T type; };");
+ statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
+ statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
+ statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+ "spvRemoveReference<T>::type& x)");
begin_scope();
- statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
- "float2(m[1][1], m[3][1]));");
+ statement("return static_cast<thread T&&>(x);");
end_scope();
- statement("");
- break;
-
- case SPVFuncImplRowMajor4x3:
- statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
- statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
+ statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+ "spvRemoveReference<T>::type&& x)");
begin_scope();
- statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
- "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
+ statement("return static_cast<thread T&&>(x);");
end_scope();
statement("");
break;
- case SPVFuncImplTextureSwizzle:
+ case SPVFuncImplGetSwizzle:
statement("enum class spvSwizzle : uint");
begin_scope();
statement("none = 0,");
@@ -2758,20 +3569,6 @@ void CompilerMSL::emit_custom_functions()
statement("alpha");
end_scope_decl();
statement("");
- statement("template<typename T> struct spvRemoveReference { typedef T type; };");
- statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
- statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
- statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
- "spvRemoveReference<T>::type& x)");
- begin_scope();
- statement("return static_cast<thread T&&>(x);");
- end_scope();
- statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
- "spvRemoveReference<T>::type&& x)");
- begin_scope();
- statement("return static_cast<thread T&&>(x);");
- end_scope();
- statement("");
statement("template<typename T>");
statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
begin_scope();
@@ -2794,6 +3591,9 @@ void CompilerMSL::emit_custom_functions()
end_scope();
end_scope();
statement("");
+ break;
+
+ case SPVFuncImplTextureSwizzle:
statement("// Wrapper function that swizzles texture samples and fetches.");
statement("template<typename T>");
statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
@@ -2812,11 +3612,14 @@ void CompilerMSL::emit_custom_functions()
statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
end_scope();
statement("");
+ break;
+
+ case SPVFuncImplGatherSwizzle:
statement("// Wrapper function that swizzles texture gathers.");
- statement("template<typename T, typename Tex, typename... Ts>");
- statement(
- "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
- "METAL_CONST_ARG(c)");
+ statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+ "typename... Ts>");
+ statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, "
+ "uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
begin_scope();
statement("if (sw)");
begin_scope();
@@ -2853,10 +3656,14 @@ void CompilerMSL::emit_custom_functions()
end_scope();
end_scope();
statement("");
+ break;
+
+ case SPVFuncImplGatherCompareSwizzle:
statement("// Wrapper function that swizzles depth texture gathers.");
- statement("template<typename T, typename Tex, typename... Ts>");
- statement(
- "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) ");
+ statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+ "typename... Ts>");
+ statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler "
+ "s, uint sw, Ts... params) ");
begin_scope();
statement("if (sw)");
begin_scope();
@@ -2877,6 +3684,828 @@ void CompilerMSL::emit_custom_functions()
statement("return t.gather_compare(s, spvForward<Ts>(params)...);");
end_scope();
statement("");
+ break;
+
+ case SPVFuncImplSubgroupBallot:
+ statement("inline uint4 spvSubgroupBallot(bool value)");
+ begin_scope();
+ statement("simd_vote vote = simd_ballot(value);");
+ statement("// simd_ballot() returns a 64-bit integer-like object, but");
+ statement("// SPIR-V callers expect a uint4. We must convert.");
+ statement("// FIXME: This won't include higher bits if Apple ever supports");
+ statement("// 128 lanes in an SIMD-group.");
+ statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> "
+ "32) & 0xFFFFFFFF), 0, 0);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplSubgroupBallotBitExtract:
+ statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)");
+ begin_scope();
+ statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplSubgroupBallotFindLSB:
+ statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
+ begin_scope();
+ statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
+ "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplSubgroupBallotFindMSB:
+ statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
+ begin_scope();
+ statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
+ "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
+ "ballot.z == 0), ballot.w == 0);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplSubgroupBallotBitCount:
+ statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
+ begin_scope();
+ statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
+ end_scope();
+ statement("");
+ statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
+ begin_scope();
+ statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
+ "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
+ "uint2(0));");
+ statement("return spvSubgroupBallotBitCount(ballot & mask);");
+ end_scope();
+ statement("");
+ statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
+ begin_scope();
+ statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
+ "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
+ statement("return spvSubgroupBallotBitCount(ballot & mask);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplSubgroupAllEqual:
+ // Metal doesn't provide a function to evaluate this directly. But, we can
+ // implement this by comparing every thread's value to one thread's value
+ // (in this case, the value of the first active thread). Then, by the transitive
+ // property of equality, if all comparisons return true, then they are all equal.
+ statement("template<typename T>");
+ statement("inline bool spvSubgroupAllEqual(T value)");
+ begin_scope();
+ statement("return simd_all(value == simd_broadcast_first(value));");
+ end_scope();
+ statement("");
+ statement("template<>");
+ statement("inline bool spvSubgroupAllEqual(bool value)");
+ begin_scope();
+ statement("return simd_all(value) || !simd_any(value);");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplReflectScalar:
+ // Metal does not support scalar versions of these functions.
+ statement("template<typename T>");
+ statement("inline T spvReflect(T i, T n)");
+ begin_scope();
+ statement("return i - T(2) * i * n * n;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplRefractScalar:
+ // Metal does not support scalar versions of these functions.
+ statement("template<typename T>");
+ statement("inline T spvRefract(T i, T n, T eta)");
+ begin_scope();
+ statement("T NoI = n * i;");
+ statement("T NoI2 = NoI * NoI;");
+ statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
+ statement("if (k < T(0))");
+ begin_scope();
+ statement("return T(0);");
+ end_scope();
+ statement("else");
+ begin_scope();
+ statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+ end_scope();
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplFaceForwardScalar:
+ // Metal does not support scalar versions of these functions.
+ statement("template<typename T>");
+ statement("inline T spvFaceForward(T n, T i, T nref)");
+ begin_scope();
+ statement("return i * nref < T(0) ? n : -n;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructNearest2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler "
+ "samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructNearest3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, "
+ "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear422CositedEven2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+ "plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+ begin_scope();
+ statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);");
+ end_scope();
+ statement("else");
+ begin_scope();
+ statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+ end_scope();
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear422CositedEven3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+ "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+ begin_scope();
+ statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+ statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+ end_scope();
+ statement("else");
+ begin_scope();
+ statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ end_scope();
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear422Midpoint2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+ "plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+ statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear422Midpoint3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+ "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+ statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+ statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+ "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+ statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+ "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+ statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+ "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+ "0)) * 0.5);");
+ statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+ "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+ "0)) * 0.5);");
+ statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+ "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+ "0.5)) * 0.5);");
+ statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+ "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+ "0.5)) * 0.5);");
+ statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+ "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+ "0.5)) * 0.5);");
+ statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane:
+ statement("template<typename T, typename... LodOptions>");
+ statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+ "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+ begin_scope();
+ statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+ statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+ statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+ "0.5)) * 0.5);");
+ statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+ "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+ "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplExpandITUFullRange:
+ statement("template<typename T>");
+ statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)");
+ begin_scope();
+ statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplExpandITUNarrowRange:
+ statement("template<typename T>");
+ statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)");
+ begin_scope();
+ statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);");
+ statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);");
+ statement("return ycbcr;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplConvertYCbCrBT709:
+ statement("// cf. Khronos Data Format Specification, section 15.1.1");
+ statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, "
+ "-0.33480248/0.7152, 0}};");
+ statement("");
+ statement("template<typename T>");
+ statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)");
+ begin_scope();
+ statement("vec<T, 4> rgba;");
+ statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);");
+ statement("rgba.a = ycbcr.a;");
+ statement("return rgba;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplConvertYCbCrBT601:
+ statement("// cf. Khronos Data Format Specification, section 15.1.2");
+ statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, "
+ "-0.419198/0.587, 0}};");
+ statement("");
+ statement("template<typename T>");
+ statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)");
+ begin_scope();
+ statement("vec<T, 4> rgba;");
+ statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);");
+ statement("rgba.a = ycbcr.a;");
+ statement("return rgba;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplConvertYCbCrBT2020:
+ statement("// cf. Khronos Data Format Specification, section 15.1.3");
+ statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, "
+ "-0.38737742/0.6780, 0}};");
+ statement("");
+ statement("template<typename T>");
+ statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)");
+ begin_scope();
+ statement("vec<T, 4> rgba;");
+ statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);");
+ statement("rgba.a = ycbcr.a;");
+ statement("return rgba;");
+ end_scope();
+ statement("");
+ break;
+
+ case SPVFuncImplDynamicImageSampler:
+ statement("enum class spvFormatResolution");
+ begin_scope();
+ statement("_444 = 0,");
+ statement("_422,");
+ statement("_420");
+ end_scope_decl();
+ statement("");
+ statement("enum class spvChromaFilter");
+ begin_scope();
+ statement("nearest = 0,");
+ statement("linear");
+ end_scope_decl();
+ statement("");
+ statement("enum class spvXChromaLocation");
+ begin_scope();
+ statement("cosited_even = 0,");
+ statement("midpoint");
+ end_scope_decl();
+ statement("");
+ statement("enum class spvYChromaLocation");
+ begin_scope();
+ statement("cosited_even = 0,");
+ statement("midpoint");
+ end_scope_decl();
+ statement("");
+ statement("enum class spvYCbCrModelConversion");
+ begin_scope();
+ statement("rgb_identity = 0,");
+ statement("ycbcr_identity,");
+ statement("ycbcr_bt_709,");
+ statement("ycbcr_bt_601,");
+ statement("ycbcr_bt_2020");
+ end_scope_decl();
+ statement("");
+ statement("enum class spvYCbCrRange");
+ begin_scope();
+ statement("itu_full = 0,");
+ statement("itu_narrow");
+ end_scope_decl();
+ statement("");
+ statement("struct spvComponentBits");
+ begin_scope();
+ statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}");
+ statement("uchar value : 6;");
+ end_scope_decl();
+ statement("// A class corresponding to metal::sampler which holds sampler");
+ statement("// Y'CbCr conversion info.");
+ statement("struct spvYCbCrSampler");
+ begin_scope();
+ statement("constexpr spvYCbCrSampler() thread : val(build()) {}");
+ statement("template<typename... Ts>");
+ statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}");
+ statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;");
+ statement("");
+ statement("spvFormatResolution get_resolution() const thread");
+ begin_scope();
+ statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);");
+ end_scope();
+ statement("spvChromaFilter get_chroma_filter() const thread");
+ begin_scope();
+ statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);");
+ end_scope();
+ statement("spvXChromaLocation get_x_chroma_offset() const thread");
+ begin_scope();
+ statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);");
+ end_scope();
+ statement("spvYChromaLocation get_y_chroma_offset() const thread");
+ begin_scope();
+ statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);");
+ end_scope();
+ statement("spvYCbCrModelConversion get_ycbcr_model() const thread");
+ begin_scope();
+ statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);");
+ end_scope();
+ statement("spvYCbCrRange get_ycbcr_range() const thread");
+ begin_scope();
+ statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);");
+ end_scope();
+ statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }");
+ statement("");
+ statement("private:");
+ statement("ushort val;");
+ statement("");
+ statement("constexpr static constant ushort resolution_bits = 2;");
+ statement("constexpr static constant ushort chroma_filter_bits = 2;");
+ statement("constexpr static constant ushort x_chroma_off_bit = 1;");
+ statement("constexpr static constant ushort y_chroma_off_bit = 1;");
+ statement("constexpr static constant ushort ycbcr_model_bits = 3;");
+ statement("constexpr static constant ushort ycbcr_range_bit = 1;");
+ statement("constexpr static constant ushort bpc_bits = 6;");
+ statement("");
+ statement("constexpr static constant ushort resolution_base = 0;");
+ statement("constexpr static constant ushort chroma_filter_base = 2;");
+ statement("constexpr static constant ushort x_chroma_off_base = 4;");
+ statement("constexpr static constant ushort y_chroma_off_base = 5;");
+ statement("constexpr static constant ushort ycbcr_model_base = 6;");
+ statement("constexpr static constant ushort ycbcr_range_base = 9;");
+ statement("constexpr static constant ushort bpc_base = 10;");
+ statement("");
+ statement(
+ "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;");
+ statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << "
+ "chroma_filter_base;");
+ statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << "
+ "x_chroma_off_base;");
+ statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << "
+ "y_chroma_off_base;");
+ statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << "
+ "ycbcr_model_base;");
+ statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << "
+ "ycbcr_range_base;");
+ statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;");
+ statement("");
+ statement("static constexpr ushort build()");
+ begin_scope();
+ statement("return 0;");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvFormatResolution res, Ts... t)");
+ begin_scope();
+ statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)");
+ begin_scope();
+ statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)");
+ begin_scope();
+ statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)");
+ begin_scope();
+ statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)");
+ begin_scope();
+ statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)");
+ begin_scope();
+ statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);");
+ end_scope();
+ statement("");
+ statement("template<typename... Ts>");
+ statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)");
+ begin_scope();
+ statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);");
+ end_scope();
+ end_scope_decl();
+ statement("");
+ statement("// A class which can hold up to three textures and a sampler, including");
+ statement("// Y'CbCr conversion info, used to pass combined image-samplers");
+ statement("// dynamically to functions.");
+ statement("template<typename T>");
+ statement("struct spvDynamicImageSampler");
+ begin_scope();
+ statement("texture2d<T> plane0;");
+ statement("texture2d<T> plane1;");
+ statement("texture2d<T> plane2;");
+ statement("sampler samp;");
+ statement("spvYCbCrSampler ycbcr_samp;");
+ statement("uint swizzle = 0;");
+ statement("");
+ if (msl_options.swizzle_texture_samples)
+ {
+ statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :");
+ statement(" plane0(tex), samp(samp), swizzle(sw) {}");
+ }
+ else
+ {
+ statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :");
+ statement(" plane0(tex), samp(samp) {}");
+ }
+ statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, "
+ "uint sw) thread :");
+ statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+ statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,");
+ statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+ statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+ statement(
+ "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,");
+ statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+ statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), "
+ "swizzle(sw) {}");
+ statement("");
+ // XXX This is really hard to follow... I've left comments to make it a bit easier.
+ statement("template<typename... LodOptions>");
+ statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread");
+ begin_scope();
+ statement("if (!is_null_texture(plane1))");
+ begin_scope();
+ statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||");
+ statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)");
+ begin_scope();
+ statement("if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ statement(
+ "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);");
+ end_scope(); // if (resolution == 422 || chroma_filter == nearest)
+ statement("switch (ycbcr_samp.get_resolution())");
+ begin_scope();
+ statement("case spvFormatResolution::_444: break;");
+ statement("case spvFormatResolution::_422:");
+ begin_scope();
+ statement("switch (ycbcr_samp.get_x_chroma_offset())");
+ begin_scope();
+ statement("case spvXChromaLocation::cosited_even:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear422CositedEven(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear422CositedEven(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ statement("case spvXChromaLocation::midpoint:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear422Midpoint(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear422Midpoint(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ end_scope(); // switch (x_chroma_offset)
+ end_scope(); // case 422:
+ statement("case spvFormatResolution::_420:");
+ begin_scope();
+ statement("switch (ycbcr_samp.get_x_chroma_offset())");
+ begin_scope();
+ statement("case spvXChromaLocation::cosited_even:");
+ begin_scope();
+ statement("switch (ycbcr_samp.get_y_chroma_offset())");
+ begin_scope();
+ statement("case spvYChromaLocation::cosited_even:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ statement("case spvYChromaLocation::midpoint:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ end_scope(); // switch (y_chroma_offset)
+ end_scope(); // case x::cosited_even:
+ statement("case spvXChromaLocation::midpoint:");
+ begin_scope();
+ statement("switch (ycbcr_samp.get_y_chroma_offset())");
+ begin_scope();
+ statement("case spvYChromaLocation::cosited_even:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ statement("case spvYChromaLocation::midpoint:");
+ statement(" if (!is_null_texture(plane2))");
+ statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
+ statement(" plane0, plane1, plane2, samp,");
+ statement(" coord, spvForward<LodOptions>(options)...);");
+ statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
+ statement(" plane0, plane1, samp, coord,");
+ statement(" spvForward<LodOptions>(options)...);");
+ end_scope(); // switch (y_chroma_offset)
+ end_scope(); // case x::midpoint
+ end_scope(); // switch (x_chroma_offset)
+ end_scope(); // case 420:
+ end_scope(); // switch (resolution)
+ end_scope(); // if (multiplanar)
+ statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);");
+ end_scope(); // do_sample()
+ statement("template <typename... LodOptions>");
+ statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread");
+ begin_scope();
+ statement(
+ "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);");
+ statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)");
+ statement(" return s;");
+ statement("");
+ statement("switch (ycbcr_samp.get_ycbcr_range())");
+ begin_scope();
+ statement("case spvYCbCrRange::itu_full:");
+ statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());");
+ statement(" break;");
+ statement("case spvYCbCrRange::itu_narrow:");
+ statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());");
+ statement(" break;");
+ end_scope();
+ statement("");
+ statement("switch (ycbcr_samp.get_ycbcr_model())");
+ begin_scope();
+ statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning
+ statement("case spvYCbCrModelConversion::ycbcr_identity:");
+ statement(" return s;");
+ statement("case spvYCbCrModelConversion::ycbcr_bt_709:");
+ statement(" return spvConvertYCbCrBT709(s);");
+ statement("case spvYCbCrModelConversion::ycbcr_bt_601:");
+ statement(" return spvConvertYCbCrBT601(s);");
+ statement("case spvYCbCrModelConversion::ycbcr_bt_2020:");
+ statement(" return spvConvertYCbCrBT2020(s);");
+ end_scope();
+ end_scope();
+ statement("");
+ // Sampler Y'CbCr conversion forbids offsets.
+ statement("vec<T, 4> sample(float2 coord, int2 offset) const thread");
+ begin_scope();
+ if (msl_options.swizzle_texture_samples)
+ statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);");
+ else
+ statement("return plane0.sample(samp, coord, offset);");
+ end_scope();
+ statement("template<typename lod_options>");
+ statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread");
+ begin_scope();
+ if (msl_options.swizzle_texture_samples)
+ statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);");
+ else
+ statement("return plane0.sample(samp, coord, options, offset);");
+ end_scope();
+ statement("#if __HAVE_MIN_LOD_CLAMP__");
+ statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread");
+ begin_scope();
+ statement("return plane0.sample(samp, coord, b, min_lod, offset);");
+ end_scope();
+ statement(
+ "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread");
+ begin_scope();
+ statement("return plane0.sample(samp, coord, grad, min_lod, offset);");
+ end_scope();
+ statement("#endif");
+ statement("");
+ // Y'CbCr conversion forbids all operations but sampling.
+ statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread");
+ begin_scope();
+ statement("return plane0.read(coord, lod);");
+ end_scope();
+ statement("");
+ statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread");
+ begin_scope();
+ if (msl_options.swizzle_texture_samples)
+ statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);");
+ else
+ statement("return plane0.gather(samp, coord, offset, c);");
+ end_scope();
+ end_scope_decl();
+ statement("");
default:
break;
@@ -2938,10 +4567,27 @@ void CompilerMSL::emit_resources()
void CompilerMSL::emit_specialization_constants_and_structs()
{
SpecializationConstant wg_x, wg_y, wg_z;
- uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+ ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
bool emitted = false;
unordered_set<uint32_t> declared_structs;
+ unordered_set<uint32_t> aligned_structs;
+
+ // First, we need to deal with scalar block layout.
+ // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself.
+ // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types.
+ // This makes the struct alignment as small as physically possible.
+ // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types.
+ ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) {
+ if (type.basetype == SPIRType::Struct &&
+ has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+ mark_scalar_layout_structs(type);
+ });
+
+ // Very particular use of the soft loop lock.
+ // align_struct may need to create custom types on the fly, but we don't care about
+ // these types for purpose of iterating over them in ir.ids_for_type and friends.
+ auto loop_lock = ir.create_loop_soft_lock();
for (auto &id_ : ir.ids_for_constant_or_type)
{
@@ -3015,7 +4661,7 @@ void CompilerMSL::emit_specialization_constants_and_structs()
// Output non-builtin interface structs. These include local function structs
// and structs nested within uniform and read-write buffers.
auto &type = id.get<SPIRType>();
- uint32_t type_id = type.self;
+ TypeID type_id = type.self;
bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty();
bool is_block =
@@ -3043,8 +4689,8 @@ void CompilerMSL::emit_specialization_constants_and_structs()
declared_structs.insert(type_id);
- if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked))
- align_struct(type);
+ if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+ align_struct(type, aligned_structs);
// Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
emit_struct(get<SPIRType>(type_id));
@@ -3154,7 +4800,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
}
else if (is_array(mbr_type))
{
- for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++)
+ for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr,
@@ -3183,7 +4829,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
else // Must be an array
{
assert(is_array(*type));
- for (uint32_t j = 0; j < type->array[0]; j++, index++)
+ for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++)
{
set<SPIRConstant>(const_mbr_id, type_id, index, false);
auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true);
@@ -3230,9 +4876,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
// Mark the result as being packed if necessary.
if (meta.storage_is_packed)
- set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
- if (meta.storage_packed_type != 0)
- set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+ set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+ if (meta.storage_physical_type != 0)
+ set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
if (meta.storage_is_invariant)
set_decoration(ops[1], DecorationInvariant);
@@ -3251,7 +4897,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
// expression so we don't try to dereference it as a variable pointer.
// Don't do this if the index is a constant 1, though. We need to drop stores
// to that one.
- auto *m = ir.find_meta(var ? var->self : 0);
+ auto *m = ir.find_meta(var ? var->self : ID(0));
if (get_execution_model() == ExecutionModelTessellationControl && var && m &&
m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles))
{
@@ -3436,21 +5082,36 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
// Bitfield
case OpBitFieldInsert:
- MSL_QFOP(insert_bits);
+ {
+ emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt);
break;
+ }
case OpBitFieldSExtract:
+ {
+ emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type,
+ SPIRType::UInt, SPIRType::UInt);
+ break;
+ }
+
case OpBitFieldUExtract:
- MSL_TFOP(extract_bits);
+ {
+ emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type,
+ SPIRType::UInt, SPIRType::UInt);
break;
+ }
case OpBitReverse:
+ // BitReverse does not have issues with sign since result type must match input type.
MSL_UFOP(reverse_bits);
break;
case OpBitCount:
- MSL_UFOP(popcount);
+ {
+ auto basetype = expression_type(ops[2]).basetype;
+ emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype);
break;
+ }
case OpFRem:
MSL_BFOP(fmod);
@@ -3628,11 +5289,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
auto store_type = texel_type;
store_type.vecsize = 4;
- statement(join(
- to_expression(img_id), ".write(", remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)),
- ", ",
- to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, &forward),
- ");"));
+ statement(join(to_expression(img_id), ".write(",
+ remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
+ to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0,
+ 0, &forward),
+ ");"));
if (p_var && variable_storage_is_aliased(*p_var))
flush_all_aliased_variables();
@@ -3686,7 +5347,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
}
case OpImageQueryLod:
- SPIRV_CROSS_THROW("MSL does not support textureQueryLod().");
+ {
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up.");
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t image_id = ops[2];
+ uint32_t coord_id = ops[3];
+ emit_uninitialized_temporary_expression(result_type, id);
+
+ auto sampler_expr = to_sampler_expression(image_id);
+ auto *combined = maybe_get<SPIRCombinedImageSampler>(image_id);
+ auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id);
+
+ // TODO: It is unclear if calculcate_clamped_lod also conditionally rounds
+ // the reported LOD based on the sampler. NEAREST miplevel should
+ // round the LOD, but LINEAR miplevel should not round.
+ // Let's hope this does not become an issue ...
+ statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ",
+ to_expression(coord_id), ");");
+ statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ",
+ to_expression(coord_id), ");");
+ register_control_dependent_expression(id);
+ break;
+ }
#define MSL_ImgQry(qrytype) \
do \
@@ -3723,10 +5407,14 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
}
else
{
- auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
auto *var = maybe_get_backing_variable(ops[2]);
+ SPIRExpression *e;
+ if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler))
+ e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true);
+ else
+ e = &emit_op(result_type, id, to_expression(ops[2]), true, true);
if (var)
- e.loaded_from = var->self;
+ e->loaded_from = var->self;
}
break;
}
@@ -3801,36 +5489,31 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
emit_barrier(ops[0], ops[1], ops[2]);
break;
- case OpVectorTimesMatrix:
- case OpMatrixTimesVector:
+ case OpOuterProduct:
{
- // If the matrix needs transpose and it is square or packed, just flip the multiply order.
- uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
- auto *e = maybe_get<SPIRExpression>(mtx_id);
- auto &t = expression_type(mtx_id);
- bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
- if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
- {
- e->need_transpose = false;
-
- // This is important for matrices. Packed matrices
- // are generally transposed, so unpacking using a constructor argument
- // will result in an error.
- // The simplest solution for now is to just avoid unpacking the matrix in this operation.
- unset_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t a = ops[2];
+ uint32_t b = ops[3];
- emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
- if (is_packed)
- set_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
- e->need_transpose = true;
+ auto &type = get<SPIRType>(result_type);
+ string expr = type_to_glsl_constructor(type);
+ expr += "(";
+ for (uint32_t col = 0; col < type.columns; col++)
+ {
+ expr += to_enclosed_expression(a);
+ expr += " * ";
+ expr += to_extract_component_expression(b, col);
+ if (col + 1 < type.columns)
+ expr += ", ";
}
- else
- MSL_BOP(*);
+ expr += ")";
+ emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+ inherit_expression_dependencies(id, a);
+ inherit_expression_dependencies(id, b);
break;
}
- // OpOuterProduct
-
case OpIAddCarry:
case OpISubBorrow:
{
@@ -3838,10 +5521,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t result_id = ops[1];
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
- forced_temporaries.insert(result_id);
auto &type = get<SPIRType>(result_type);
- statement(variable_decl(type, to_name(result_id)), ";");
- set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+ emit_uninitialized_temporary_expression(result_type, result_id);
auto &res_type = get<SPIRType>(type.member_types[1]);
if (opcode == OpIAddCarry)
@@ -3870,10 +5551,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
uint32_t result_id = ops[1];
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
- forced_temporaries.insert(result_id);
auto &type = get<SPIRType>(result_type);
- statement(variable_decl(type, to_name(result_id)), ";");
- set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+ emit_uninitialized_temporary_expression(result_type, result_id);
statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ",
to_enclosed_expression(op1), ";");
@@ -3882,6 +5561,91 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
}
+ case OpArrayLength:
+ {
+ auto &type = expression_type(ops[2]);
+ uint32_t offset = type_struct_member_offset(type, ops[3]);
+ uint32_t stride = type_struct_member_array_stride(type, ops[3]);
+
+ auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride);
+ emit_op(ops[0], ops[1], expr, true);
+ break;
+ }
+
+ // SPV_INTEL_shader_integer_functions2
+ case OpUCountLeadingZerosINTEL:
+ MSL_UFOP(clz);
+ break;
+
+ case OpUCountTrailingZerosINTEL:
+ MSL_UFOP(ctz);
+ break;
+
+ case OpAbsISubINTEL:
+ case OpAbsUSubINTEL:
+ MSL_BFOP(absdiff);
+ break;
+
+ case OpIAddSatINTEL:
+ case OpUAddSatINTEL:
+ MSL_BFOP(addsat);
+ break;
+
+ case OpIAverageINTEL:
+ case OpUAverageINTEL:
+ MSL_BFOP(hadd);
+ break;
+
+ case OpIAverageRoundedINTEL:
+ case OpUAverageRoundedINTEL:
+ MSL_BFOP(rhadd);
+ break;
+
+ case OpISubSatINTEL:
+ case OpUSubSatINTEL:
+ MSL_BFOP(subsat);
+ break;
+
+ case OpIMul32x16INTEL:
+ {
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t a = ops[2], b = ops[3];
+ bool forward = should_forward(a) && should_forward(b);
+ emit_op(result_type, id, join("int(short(", to_expression(a), ")) * int(short(", to_expression(b), "))"),
+ forward);
+ inherit_expression_dependencies(id, a);
+ inherit_expression_dependencies(id, b);
+ break;
+ }
+
+ case OpUMul32x16INTEL:
+ {
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t a = ops[2], b = ops[3];
+ bool forward = should_forward(a) && should_forward(b);
+ emit_op(result_type, id, join("uint(ushort(", to_expression(a), ")) * uint(ushort(", to_expression(b), "))"),
+ forward);
+ inherit_expression_dependencies(id, a);
+ inherit_expression_dependencies(id, b);
+ break;
+ }
+
+ case OpIsHelperInvocationEXT:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS.");
+ else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
+ emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
+ break;
+
+ case OpBeginInvocationInterlockEXT:
+ case OpEndInvocationInterlockEXT:
+ if (!msl_options.supports_msl_version(2, 0))
+ SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+ break; // Nothing to do in the body
+
default:
CompilerGLSL::emit_instruction(instruction);
break;
@@ -3895,33 +5659,70 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl)
return;
- string bar_stmt = "threadgroup_barrier(mem_flags::";
+ uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation);
+ uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation);
+ // Use the wider of the two scopes (smaller value)
+ exe_scope = min(exe_scope, mem_scope);
+
+ string bar_stmt;
+ if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
+ bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
+ else
+ bar_stmt = "threadgroup_barrier";
+ bar_stmt += "(";
uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone);
- if (get_execution_model() == ExecutionModelTessellationControl)
+ // Use the | operator to combine flags if we can.
+ if (msl_options.supports_msl_version(1, 2))
+ {
+ string mem_flags = "";
// For tesc shaders, this also affects objects in the Output storage class.
// Since in Metal, these are placed in a device buffer, we have to sync device memory here.
- bar_stmt += "mem_device";
- else if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask)
- bar_stmt += "mem_device";
- else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
- MemorySemanticsAtomicCounterMemoryMask))
- bar_stmt += "mem_threadgroup";
- else if (mem_sem & MemorySemanticsImageMemoryMask)
- bar_stmt += "mem_texture";
+ if (get_execution_model() == ExecutionModelTessellationControl ||
+ (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
+ mem_flags += "mem_flags::mem_device";
+ if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+ MemorySemanticsAtomicCounterMemoryMask))
+ {
+ if (!mem_flags.empty())
+ mem_flags += " | ";
+ mem_flags += "mem_flags::mem_threadgroup";
+ }
+ if (mem_sem & MemorySemanticsImageMemoryMask)
+ {
+ if (!mem_flags.empty())
+ mem_flags += " | ";
+ mem_flags += "mem_flags::mem_texture";
+ }
+
+ if (mem_flags.empty())
+ mem_flags = "mem_flags::mem_none";
+
+ bar_stmt += mem_flags;
+ }
else
- bar_stmt += "mem_none";
+ {
+ if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) &&
+ (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+ MemorySemanticsAtomicCounterMemoryMask)))
+ bar_stmt += "mem_flags::mem_device_and_threadgroup";
+ else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))
+ bar_stmt += "mem_flags::mem_device";
+ else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
+ MemorySemanticsAtomicCounterMemoryMask))
+ bar_stmt += "mem_flags::mem_threadgroup";
+ else if (mem_sem & MemorySemanticsImageMemoryMask)
+ bar_stmt += "mem_flags::mem_texture";
+ else
+ bar_stmt += "mem_flags::mem_none";
+ }
if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1)))
{
bar_stmt += ", ";
- // Use the wider of the two scopes (smaller value)
- uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation);
- uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation);
- uint32_t scope = min(exe_scope, mem_scope);
- switch (scope)
+ switch (mem_scope)
{
case ScopeCrossDevice:
case ScopeDevice:
@@ -3949,7 +5750,8 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
flush_all_active_variables();
}
-void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage,
+ StorageClass rhs_storage)
{
// Assignment from an array initializer is fine.
auto &type = expression_type(rhs_id);
@@ -3977,21 +5779,32 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
if (type.array.size() > SPVFuncImplArrayCopyMultidimMax)
SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
- if (spv_function_implementations.count(func) == 0)
- {
- spv_function_implementations.insert(func);
- suppress_missing_prototypes = true;
- force_recompile();
- }
- }
- else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0)
- {
- spv_function_implementations.insert(SPVFuncImplArrayCopy);
- suppress_missing_prototypes = true;
- force_recompile();
+ add_spv_func_and_recompile(func);
}
+ else
+ add_spv_func_and_recompile(SPVFuncImplArrayCopy);
+
+ bool lhs_thread = lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction ||
+ lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate;
+ bool rhs_thread = rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction ||
+ rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate;
+
+ const char *tag = nullptr;
+ if (lhs_thread && is_constant)
+ tag = "FromConstantToStack";
+ else if (lhs_storage == StorageClassWorkgroup && is_constant)
+ tag = "FromConstantToThreadGroup";
+ else if (lhs_thread && rhs_thread)
+ tag = "FromStackToStack";
+ else if (lhs_storage == StorageClassWorkgroup && rhs_thread)
+ tag = "FromStackToThreadGroup";
+ else if (lhs_thread && rhs_storage == StorageClassWorkgroup)
+ tag = "FromThreadGroupToStack";
+ else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
+ tag = "FromThreadGroupToThreadGroup";
+ else
+ SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
- const char *tag = is_constant ? "FromConstant" : "FromStack";
statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
}
@@ -4028,7 +5841,8 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
if (p_v_lhs)
flush_variable_declaration(p_v_lhs->self);
- emit_array_copy(to_expression(id_lhs), id_rhs);
+ emit_array_copy(to_expression(id_lhs), id_rhs, get_backing_variable_storage(id_lhs),
+ get_backing_variable_storage(id_rhs));
register_write(id_lhs);
return true;
@@ -4039,12 +5853,10 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
{
- forced_temporaries.insert(result_id);
-
string exp = string(op) + "(";
auto &type = get_pointee_type(expression_type(obj));
- exp += "(volatile ";
+ exp += "(";
auto *var = maybe_get_backing_variable(obj);
if (!var)
SPIRV_CROSS_THROW("No backing variable for atomic operation.");
@@ -4079,12 +5891,11 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
// the CAS loop, otherwise it will loop infinitely, with the comparison test always failing.
// The function updates the comparitor value from the memory value, so the additional
// comparison test evaluates the memory value against the expected value.
- statement(variable_decl(type, to_name(result_id)), ";");
+ emit_uninitialized_temporary_expression(result_type, result_id);
statement("do");
begin_scope();
statement(to_name(result_id), " = ", to_expression(op1), ";");
end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")"));
- set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
}
else
{
@@ -4138,12 +5949,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
emit_unary_func_op(result_type, id, args[0], "rint");
break;
+ case GLSLstd450FindILsb:
+ {
+ // In this template version of findLSB, we return T.
+ auto basetype = expression_type(args[0]).basetype;
+ emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype);
+ break;
+ }
+
case GLSLstd450FindSMsb:
- emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type);
+ emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type);
break;
case GLSLstd450FindUMsb:
- emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type);
+ emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type);
break;
case GLSLstd450PackSnorm4x8:
@@ -4267,12 +6086,130 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
+ case GLSLstd450Distance:
+ // MSL does not support scalar versions here.
+ if (expression_type(args[0]).vecsize == 1)
+ {
+ // Equivalent to length(a - b) -> abs(a - b).
+ emit_op(result_type, id,
+ join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
+ should_forward(args[0]) && should_forward(args[1]));
+ inherit_expression_dependencies(id, args[0]);
+ inherit_expression_dependencies(id, args[1]);
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Length:
+ // MSL does not support scalar versions here.
+ if (expression_type(args[0]).vecsize == 1)
+ {
+ // Equivalent to abs().
+ emit_unary_func_op(result_type, id, args[0], "abs");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Normalize:
+ // MSL does not support scalar versions here.
+ if (expression_type(args[0]).vecsize == 1)
+ {
+ // Returns -1 or 1 for valid input, sign() does the job.
+ emit_unary_func_op(result_type, id, args[0], "sign");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Reflect:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Refract:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450FaceForward:
+ if (get<SPIRType>(result_type).vecsize == 1)
+ emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+
+ case GLSLstd450Modf:
+ case GLSLstd450Frexp:
+ {
+ // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary.
+ auto *ptr = maybe_get<SPIRExpression>(args[1]);
+ if (ptr && ptr->access_chain && is_scalar(expression_type(args[1])))
+ {
+ register_call_out_argument(args[1]);
+ forced_temporaries.insert(id);
+
+ // Need to create temporaries and copy over to access chain after.
+ // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ...
+ uint32_t &tmp_id = extra_sub_expressions[id];
+ if (!tmp_id)
+ tmp_id = ir.increase_bound_by(1);
+
+ uint32_t tmp_type_id = get_pointee_type_id(ptr->expression_type);
+ emit_uninitialized_temporary_expression(tmp_type_id, tmp_id);
+ emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp");
+ statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";");
+ }
+ else
+ CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+ break;
+ }
+
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
break;
}
}
+void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
+ const uint32_t *args, uint32_t count)
+{
+ enum AMDShaderTrinaryMinMax
+ {
+ FMin3AMD = 1,
+ UMin3AMD = 2,
+ SMin3AMD = 3,
+ FMax3AMD = 4,
+ UMax3AMD = 5,
+ SMax3AMD = 6,
+ FMid3AMD = 7,
+ UMid3AMD = 8,
+ SMid3AMD = 9
+ };
+
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1.");
+
+ auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
+
+ switch (op)
+ {
+ case FMid3AMD:
+ case UMid3AMD:
+ case SMid3AMD:
+ emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3");
+ break;
+ default:
+ CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count);
+ break;
+ }
+}
+
// Emit a structure declaration for the specified interface variable.
void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
{
@@ -4293,9 +6230,10 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
add_function_overload(func);
local_variable_names = resource_names;
- string decl;
- processing_entry_point = (func.self == ir.default_entry_point);
+ processing_entry_point = func.self == ir.default_entry_point;
+
+ string decl = processing_entry_point ? "" : "inline ";
auto &type = get<SPIRType>(func.return_type);
@@ -4306,7 +6244,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
else
{
// We cannot return arrays in MSL, so "return" through an out variable.
- decl = "void";
+ decl += "void";
}
decl += " ";
@@ -4338,7 +6276,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
for (auto var_id : vars_needing_early_declaration)
{
auto &ed_var = get<SPIRVariable>(var_id);
- uint32_t &initializer = ed_var.initializer;
+ ID &initializer = ed_var.initializer;
if (!initializer)
initializer = ir.increase_bound_by(1);
@@ -4367,16 +6305,36 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
decl += argument_decl(arg);
- // Manufacture automatic sampler arg for SampledImage texture
+ bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+
auto &arg_type = get<SPIRType>(arg.type);
- if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
- decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
+ if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler)
+ {
+ // Manufacture automatic plane args for multiplanar texture
+ uint32_t planes = 1;
+ if (auto *constexpr_sampler = find_constexpr_sampler(name_id))
+ if (constexpr_sampler->ycbcr_conversion_enable)
+ planes = constexpr_sampler->planes;
+ for (uint32_t i = 1; i < planes; i++)
+ decl += join(", ", argument_decl(arg), plane_name_suffix, i);
+
+ // Manufacture automatic sampler arg for SampledImage texture
+ if (arg_type.image.dim != DimBuffer)
+ decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
+ }
// Manufacture automatic swizzle arg.
- if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type))
+ if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) &&
+ !is_dynamic_img_sampler)
{
bool arg_is_array = !arg_type.array.empty();
- decl += join(", constant uint32_t", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
+ decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
+ }
+
+ if (buffers_requiring_array_length.count(name_id))
+ {
+ bool arg_is_array = !arg_type.array.empty();
+ decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id));
}
if (&arg != &func.arguments.back())
@@ -4387,85 +6345,237 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
statement(decl);
}
+static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler)
+{
+ // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images
+ // use implicit reconstruction.
+ return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1;
+}
+
// Returns the texture sampling function string for the specified image and sampling characteristics.
-string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool,
- bool has_offset, bool, bool has_dref, uint32_t)
+string CompilerMSL::to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool,
+ bool, bool, bool has_dref, uint32_t, uint32_t)
{
+ const MSLConstexprSampler *constexpr_sampler = nullptr;
+ bool is_dynamic_img_sampler = false;
+ if (auto *var = maybe_get_backing_variable(img))
+ {
+ constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+ is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+ }
+
// Special-case gather. We have to alter the component being looked up
// in the swizzle case.
- if (msl_options.swizzle_texture_samples && is_gather)
+ if (msl_options.swizzle_texture_samples && is_gather && !is_dynamic_img_sampler &&
+ (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
{
- string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
- fname += "<" + type_to_glsl(get<SPIRType>(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype);
- // Add the arg types ourselves. Yes, this sucks, but Clang can't
- // deduce template pack parameters in the middle of an argument list.
- switch (imgtype.image.dim)
- {
- case Dim2D:
- fname += ", float2";
- if (imgtype.image.arrayed)
- fname += ", uint";
- if (imgtype.image.depth)
- fname += ", float";
- if (!imgtype.image.depth || has_offset)
- fname += ", int2";
- break;
- case DimCube:
- fname += ", float3";
- if (imgtype.image.arrayed)
- fname += ", uint";
- if (imgtype.image.depth)
- fname += ", float";
- break;
- default:
- SPIRV_CROSS_THROW("Invalid texture dimension for gather op.");
- }
- fname += ">";
- return fname;
+ add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle);
+ return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
}
auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
// Texture reference
- string fname = to_expression(combined ? combined->image : img) + ".";
- if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype))
- fname = "spvTextureSwizzle(" + fname;
-
- // Texture function and sampler
- if (is_fetch)
- fname += "read";
- else if (is_gather)
- fname += "gather";
+ string fname;
+ if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler)
+ {
+ if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3)
+ SPIRV_CROSS_THROW("Unhandled number of color image planes!");
+ // 444 images aren't downsampled, so we don't need to do linear filtering.
+ if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 ||
+ constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST)
+ {
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane);
+ else
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane);
+ fname = "spvChromaReconstructNearest";
+ }
+ else // Linear with a downsampled format
+ {
+ fname = "spvChromaReconstructLinear";
+ switch (constexpr_sampler->resolution)
+ {
+ case MSL_FORMAT_RESOLUTION_444:
+ assert(false);
+ break; // not reached
+ case MSL_FORMAT_RESOLUTION_422:
+ switch (constexpr_sampler->x_chroma_offset)
+ {
+ case MSL_CHROMA_LOCATION_COSITED_EVEN:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane);
+ else
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane);
+ fname += "422CositedEven";
+ break;
+ case MSL_CHROMA_LOCATION_MIDPOINT:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane);
+ else
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane);
+ fname += "422Midpoint";
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid chroma location.");
+ }
+ break;
+ case MSL_FORMAT_RESOLUTION_420:
+ fname += "420";
+ switch (constexpr_sampler->x_chroma_offset)
+ {
+ case MSL_CHROMA_LOCATION_COSITED_EVEN:
+ switch (constexpr_sampler->y_chroma_offset)
+ {
+ case MSL_CHROMA_LOCATION_COSITED_EVEN:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane);
+ else
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane);
+ fname += "XCositedEvenYCositedEven";
+ break;
+ case MSL_CHROMA_LOCATION_MIDPOINT:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane);
+ else
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane);
+ fname += "XCositedEvenYMidpoint";
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid Y chroma location.");
+ }
+ break;
+ case MSL_CHROMA_LOCATION_MIDPOINT:
+ switch (constexpr_sampler->y_chroma_offset)
+ {
+ case MSL_CHROMA_LOCATION_COSITED_EVEN:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane);
+ else
+ add_spv_func_and_recompile(
+ SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane);
+ fname += "XMidpointYCositedEven";
+ break;
+ case MSL_CHROMA_LOCATION_MIDPOINT:
+ if (constexpr_sampler->planes == 2)
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane);
+ else
+ add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane);
+ fname += "XMidpointYMidpoint";
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid Y chroma location.");
+ }
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid X chroma location.");
+ }
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid format resolution.");
+ }
+ }
+ }
else
- fname += "sample";
+ {
+ fname = to_expression(combined ? combined->image : img) + ".";
- if (has_dref)
- fname += "_compare";
+ // Texture function and sampler
+ if (is_fetch)
+ fname += "read";
+ else if (is_gather)
+ fname += "gather";
+ else
+ fname += "sample";
+
+ if (has_dref)
+ fname += "_compare";
+ }
return fname;
}
+string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
+{
+ SPIRType t;
+ t.basetype = SPIRType::Float;
+ t.vecsize = components;
+ t.columns = 1;
+ return join(type_to_glsl_constructor(t), "(", expr, ")");
+}
+
+static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
+{
+ // Double is not supported to begin with, but doesn't hurt to check for completion.
+ return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
+}
+
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
- uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
- uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp,
- uint32_t sample, bool *p_forward)
+string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+ bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x,
+ uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
+ uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward)
{
+ const MSLConstexprSampler *constexpr_sampler = nullptr;
+ bool is_dynamic_img_sampler = false;
+ if (auto *var = maybe_get_backing_variable(img))
+ {
+ constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+ is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+ }
+
string farg_str;
- if (!is_fetch)
- farg_str += to_sampler_expression(img);
+ bool forward = true;
- if (msl_options.swizzle_texture_samples && is_gather)
+ if (!is_dynamic_img_sampler)
{
- if (!farg_str.empty())
- farg_str += ", ";
+ // Texture reference (for some cases)
+ if (needs_chroma_reconstruction(constexpr_sampler))
+ {
+ // Multiplanar images need two or three textures.
+ farg_str += to_expression(img);
+ for (uint32_t i = 1; i < constexpr_sampler->planes; i++)
+ farg_str += join(", ", to_expression(img), plane_name_suffix, i);
+ }
+ else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+ msl_options.swizzle_texture_samples && is_gather)
+ {
+ auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
+ farg_str += to_expression(combined ? combined->image : img);
+ }
- auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
- farg_str += to_expression(combined ? combined->image : img);
+ // Sampler reference
+ if (!is_fetch)
+ {
+ if (!farg_str.empty())
+ farg_str += ", ";
+ farg_str += to_sampler_expression(img);
+ }
+
+ if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+ msl_options.swizzle_texture_samples && is_gather)
+ {
+ // Add the swizzle constant from the swizzle buffer.
+ farg_str += ", " + to_swizzle_expression(img);
+ used_swizzle_buffer = true;
+ }
+
+ // Swizzled gather puts the component before the other args, to allow template
+ // deduction to work.
+ if (comp && msl_options.swizzle_texture_samples)
+ {
+ forward = should_forward(comp);
+ farg_str += ", " + to_component_argument(comp);
+ }
}
// Texture coordinates
- bool forward = should_forward(coord);
+ forward = forward && should_forward(coord);
auto coord_expr = to_enclosed_expression(coord);
auto &coord_type = expression_type(coord);
bool coord_is_fp = type_is_floating_point(coord_type);
@@ -4483,6 +6593,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+ else if (sampling_type_needs_f32_conversion(coord_type))
+ tex_coords = convert_to_f32(tex_coords, 1);
alt_coord_component = 1;
break;
@@ -4518,6 +6630,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+ else if (sampling_type_needs_f32_conversion(coord_type))
+ tex_coords = convert_to_f32(tex_coords, 2);
alt_coord_component = 2;
break;
@@ -4528,6 +6642,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (is_fetch)
tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+ else if (sampling_type_needs_f32_conversion(coord_type))
+ tex_coords = convert_to_f32(tex_coords, 3);
alt_coord_component = 3;
break;
@@ -4545,6 +6661,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
tex_coords = enclose_expression(tex_coords) + ".xyz";
}
+ if (sampling_type_needs_f32_conversion(coord_type))
+ tex_coords = convert_to_f32(tex_coords, 3);
+
alt_coord_component = 3;
break;
@@ -4575,7 +6694,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
// If projection, use alt coord as divisor
if (is_proj)
- tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+ {
+ if (sampling_type_needs_f32_conversion(coord_type))
+ tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+ else
+ tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+ }
if (!farg_str.empty())
farg_str += ", ";
@@ -4609,11 +6733,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
forward = forward && should_forward(dref);
farg_str += ", ";
+ auto &dref_type = expression_type(dref);
+
+ string dref_expr;
if (is_proj)
- farg_str +=
- to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
+ dref_expr =
+ join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
else
- farg_str += to_expression(dref);
+ dref_expr = to_expression(dref);
+
+ if (sampling_type_needs_f32_conversion(dref_type))
+ dref_expr = convert_to_f32(dref_expr, 1);
+
+ farg_str += dref_expr;
if (msl_options.is_macos() && (grad_x || grad_y))
{
@@ -4706,6 +6838,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")";
}
+ if (minlod)
+ {
+ if (msl_options.is_macos())
+ {
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS.");
+ }
+ else if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS.");
+
+ forward = forward && should_forward(minlod);
+ farg_str += ", min_lod_clamp(" + to_expression(minlod) + ")";
+ }
+
// Add offsets
string offset_expr;
if (coffset && !is_fetch)
@@ -4748,25 +6894,20 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
if (imgtype.image.dim == Dim2D && offset_expr.empty())
farg_str += ", int2(0)";
- forward = forward && should_forward(comp);
- farg_str += ", " + to_component_argument(comp);
+ if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
+ {
+ forward = forward && should_forward(comp);
+ farg_str += ", " + to_component_argument(comp);
+ }
}
if (sample)
{
+ forward = forward && should_forward(sample);
farg_str += ", ";
farg_str += to_expression(sample);
}
- if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype))
- {
- // Add the swizzle constant from the swizzle buffer.
- if (!is_gather)
- farg_str += ")";
- farg_str += ", " + to_swizzle_expression(img);
- used_aux_buffer = true;
- }
-
*p_forward = forward;
return farg_str;
@@ -4813,12 +6954,216 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id
set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
}
+string CompilerMSL::to_texture_op(const Instruction &i, bool *forward, SmallVector<uint32_t> &inherited_expressions)
+{
+ auto *ops = stream(i);
+ uint32_t result_type_id = ops[0];
+ uint32_t img = ops[2];
+ auto &result_type = get<SPIRType>(result_type_id);
+ auto op = static_cast<Op>(i.op);
+ bool is_gather = (op == OpImageGather || op == OpImageDrefGather);
+
+ // Bypass pointers because we need the real image struct
+ auto &type = expression_type(img);
+ auto &imgtype = get<SPIRType>(type.self);
+
+ const MSLConstexprSampler *constexpr_sampler = nullptr;
+ bool is_dynamic_img_sampler = false;
+ if (auto *var = maybe_get_backing_variable(img))
+ {
+ constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+ is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+ }
+
+ string expr;
+ if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+ {
+ // If this needs sampler Y'CbCr conversion, we need to do some additional
+ // processing.
+ switch (constexpr_sampler->ycbcr_model)
+ {
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+ add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709);
+ expr += "spvConvertYCbCrBT709(";
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+ add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601);
+ expr += "spvConvertYCbCrBT601(";
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+ add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020);
+ expr += "spvConvertYCbCrBT2020(";
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+ }
+
+ if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+ {
+ switch (constexpr_sampler->ycbcr_range)
+ {
+ case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL:
+ add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange);
+ expr += "spvExpandITUFullRange(";
+ break;
+ case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW:
+ add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange);
+ expr += "spvExpandITUNarrowRange(";
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid Y'CbCr range.");
+ }
+ }
+ }
+ else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+ !is_dynamic_img_sampler)
+ {
+ add_spv_func_and_recompile(SPVFuncImplTextureSwizzle);
+ expr += "spvTextureSwizzle(";
+ }
+
+ string inner_expr = CompilerGLSL::to_texture_op(i, forward, inherited_expressions);
+
+ if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+ {
+ if (!constexpr_sampler->swizzle_is_identity())
+ {
+ static const char swizzle_names[] = "rgba";
+ if (!constexpr_sampler->swizzle_has_one_or_zero())
+ {
+ // If we can, do it inline.
+ expr += inner_expr + ".";
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ switch (constexpr_sampler->swizzle[c])
+ {
+ case MSL_COMPONENT_SWIZZLE_IDENTITY:
+ expr += swizzle_names[c];
+ break;
+ case MSL_COMPONENT_SWIZZLE_R:
+ case MSL_COMPONENT_SWIZZLE_G:
+ case MSL_COMPONENT_SWIZZLE_B:
+ case MSL_COMPONENT_SWIZZLE_A:
+ expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid component swizzle.");
+ }
+ }
+ }
+ else
+ {
+ // Otherwise, we need to emit a temporary and swizzle that.
+ uint32_t temp_id = ir.increase_bound_by(1);
+ emit_op(result_type_id, temp_id, inner_expr, false);
+ for (auto &inherit : inherited_expressions)
+ inherit_expression_dependencies(temp_id, inherit);
+ inherited_expressions.clear();
+ inherited_expressions.push_back(temp_id);
+
+ switch (op)
+ {
+ case OpImageSampleDrefImplicitLod:
+ case OpImageSampleImplicitLod:
+ case OpImageSampleProjImplicitLod:
+ case OpImageSampleProjDrefImplicitLod:
+ register_control_dependent_expression(temp_id);
+ break;
+
+ default:
+ break;
+ }
+ expr += type_to_glsl(result_type) + "(";
+ for (uint32_t c = 0; c < 4; c++)
+ {
+ switch (constexpr_sampler->swizzle[c])
+ {
+ case MSL_COMPONENT_SWIZZLE_IDENTITY:
+ expr += to_expression(temp_id) + "." + swizzle_names[c];
+ break;
+ case MSL_COMPONENT_SWIZZLE_ZERO:
+ expr += "0";
+ break;
+ case MSL_COMPONENT_SWIZZLE_ONE:
+ expr += "1";
+ break;
+ case MSL_COMPONENT_SWIZZLE_R:
+ case MSL_COMPONENT_SWIZZLE_G:
+ case MSL_COMPONENT_SWIZZLE_B:
+ case MSL_COMPONENT_SWIZZLE_A:
+ expr += to_expression(temp_id) + "." +
+ swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid component swizzle.");
+ }
+ if (c < 3)
+ expr += ", ";
+ }
+ expr += ")";
+ }
+ }
+ else
+ expr += inner_expr;
+ if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+ {
+ expr += join(", ", constexpr_sampler->bpc, ")");
+ if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
+ expr += ")";
+ }
+ }
+ else
+ {
+ expr += inner_expr;
+ if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+ !is_dynamic_img_sampler)
+ {
+ // Add the swizzle constant from the swizzle buffer.
+ expr += ", " + to_swizzle_expression(img) + ")";
+ used_swizzle_buffer = true;
+ }
+ }
+
+ return expr;
+}
+
+static string create_swizzle(MSLComponentSwizzle swizzle)
+{
+ switch (swizzle)
+ {
+ case MSL_COMPONENT_SWIZZLE_IDENTITY:
+ return "spvSwizzle::none";
+ case MSL_COMPONENT_SWIZZLE_ZERO:
+ return "spvSwizzle::zero";
+ case MSL_COMPONENT_SWIZZLE_ONE:
+ return "spvSwizzle::one";
+ case MSL_COMPONENT_SWIZZLE_R:
+ return "spvSwizzle::red";
+ case MSL_COMPONENT_SWIZZLE_G:
+ return "spvSwizzle::green";
+ case MSL_COMPONENT_SWIZZLE_B:
+ return "spvSwizzle::blue";
+ case MSL_COMPONENT_SWIZZLE_A:
+ return "spvSwizzle::alpha";
+ default:
+ SPIRV_CROSS_THROW("Invalid component swizzle.");
+ return "";
+ }
+}
+
// Returns a string representation of the ID, usable as a function arg.
// Manufacture automatic sampler arg for SampledImage texture.
-string CompilerMSL::to_func_call_arg(uint32_t id)
+string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
{
string arg_str;
+ auto &type = expression_type(id);
+ bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+ // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around.
+ bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler);
+ if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
+ arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
+
auto *c = maybe_get<SPIRConstant>(id);
if (c && !get<SPIRType>(c->constant_type).array.empty())
{
@@ -4833,7 +7178,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
// so just create a thread local copy in the current function.
arg_str = join("_", id, "_array_copy");
auto &constants = current_function->constant_arrays_needed_on_stack;
- auto itr = find(begin(constants), end(constants), id);
+ auto itr = find(begin(constants), end(constants), ID(id));
if (itr == end(constants))
{
force_recompile();
@@ -4841,22 +7186,106 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
}
}
else
- arg_str = CompilerGLSL::to_func_call_arg(id);
+ arg_str += CompilerGLSL::to_func_call_arg(arg, id);
- // Manufacture automatic sampler arg if the arg is a SampledImage texture.
- auto &type = expression_type(id);
- if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
+ if (!arg_is_dynamic_img_sampler)
{
// Need to check the base variable in case we need to apply a qualified alias.
uint32_t var_id = 0;
- auto *sampler_var = maybe_get<SPIRVariable>(id);
- if (sampler_var)
- var_id = sampler_var->basevariable;
+ auto *var = maybe_get<SPIRVariable>(id);
+ if (var)
+ var_id = var->basevariable;
- arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
+ auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id);
+ if (type.basetype == SPIRType::SampledImage)
+ {
+ // Manufacture automatic plane args for multiplanar texture
+ uint32_t planes = 1;
+ if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+ {
+ planes = constexpr_sampler->planes;
+ // If this parameter isn't aliasing a global, then we need to use
+ // the special "dynamic image-sampler" class to pass it--and we need
+ // to use it for *every* non-alias parameter, in case a combined
+ // image-sampler with a Y'CbCr conversion is passed. Hopefully, this
+ // pathological case is so rare that it should never be hit in practice.
+ if (!arg.alias_global_variable)
+ add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler);
+ }
+ for (uint32_t i = 1; i < planes; i++)
+ arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i);
+ // Manufacture automatic sampler arg if the arg is a SampledImage texture.
+ if (type.image.dim != DimBuffer)
+ arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
+
+ // Add sampler Y'CbCr conversion info if we have it
+ if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+ {
+ SmallVector<string> samp_args;
+
+ switch (constexpr_sampler->resolution)
+ {
+ case MSL_FORMAT_RESOLUTION_444:
+ // Default
+ break;
+ case MSL_FORMAT_RESOLUTION_422:
+ samp_args.push_back("spvFormatResolution::_422");
+ break;
+ case MSL_FORMAT_RESOLUTION_420:
+ samp_args.push_back("spvFormatResolution::_420");
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid format resolution.");
+ }
+
+ if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST)
+ samp_args.push_back("spvChromaFilter::linear");
+
+ if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+ samp_args.push_back("spvXChromaLocation::midpoint");
+ if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+ samp_args.push_back("spvYChromaLocation::midpoint");
+ switch (constexpr_sampler->ycbcr_model)
+ {
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
+ // Default
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
+ samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity");
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+ samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709");
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+ samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601");
+ break;
+ case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+ samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020");
+ break;
+ default:
+ SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+ }
+ if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL)
+ samp_args.push_back("spvYCbCrRange::itu_narrow");
+ samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")"));
+ arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")");
+ }
+ }
+
+ if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+ arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(",
+ create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(",
+ create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(",
+ create_swizzle(constexpr_sampler->swizzle[0]), ")");
+ else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
+ arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
+
+ if (buffers_requiring_array_length.count(var_id))
+ arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
+
+ if (is_dynamic_img_sampler)
+ arg_str += ")";
}
- if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
- arg_str += ", " + to_swizzle_expression(id);
return arg_str;
}
@@ -4867,7 +7296,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
string CompilerMSL::to_sampler_expression(uint32_t id)
{
auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
- auto expr = to_expression(combined ? combined->image : id);
+ auto expr = to_expression(combined ? combined->image : VariableID(id));
auto index = expr.find_first_of('[');
uint32_t samp_id = 0;
@@ -4887,9 +7316,15 @@ string CompilerMSL::to_sampler_expression(uint32_t id)
string CompilerMSL::to_swizzle_expression(uint32_t id)
{
auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
- auto expr = to_expression(combined ? combined->image : id);
+
+ auto expr = to_expression(combined ? combined->image : VariableID(id));
auto index = expr.find_first_of('[');
+ // If an image is part of an argument buffer translate this to a legal identifier.
+ for (auto &c : expr)
+ if (c == '.')
+ c = '_';
+
if (index == string::npos)
return expr + swizzle_name_suffix;
else
@@ -4900,6 +7335,32 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
}
}
+string CompilerMSL::to_buffer_size_expression(uint32_t id)
+{
+ auto expr = to_expression(id);
+ auto index = expr.find_first_of('[');
+
+ // This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to
+ // the pointer expression spvDescriptorSetN.name to make a reasonable expression here.
+ // This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set.
+ if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*')
+ expr = address_of_expression(expr);
+
+ // If a buffer is part of an argument buffer translate this to a legal identifier.
+ for (auto &c : expr)
+ if (c == '.')
+ c = '_';
+
+ if (index == string::npos)
+ return expr + buffer_size_name_suffix;
+ else
+ {
+ auto buffer_expr = expr.substr(0, index);
+ auto array_expr = expr.substr(index);
+ return buffer_expr + buffer_size_name_suffix + array_expr;
+ }
+}
+
// Checks whether the type is a Block all of whose members have DecorationPatch.
bool CompilerMSL::is_patch_block(const SPIRType &type)
{
@@ -4918,91 +7379,33 @@ bool CompilerMSL::is_patch_block(const SPIRType &type)
// Checks whether the ID is a row_major matrix that requires conversion before use
bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
{
- // Natively supported row-major matrices do not need to be converted.
- if (backend.native_row_major_matrix)
- return false;
-
- // Non-matrix or column-major matrix types do not need to be converted.
- if (!has_decoration(id, DecorationRowMajor))
- return false;
-
- // Generate a function that will swap matrix elements from row-major to column-major.
- // Packed row-matrix should just use transpose() function.
- if (!has_extended_decoration(id, SPIRVCrossDecorationPacked))
- {
- const auto type = expression_type(id);
- add_convert_row_major_matrix_function(type.columns, type.vecsize);
- }
-
- return true;
+ auto *e = maybe_get<SPIRExpression>(id);
+ if (e)
+ return e->need_transpose;
+ else
+ return has_decoration(id, DecorationRowMajor);
}
// Checks whether the member is a row_major matrix that requires conversion before use
bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
{
- // Natively supported row-major matrices do not need to be converted.
- if (backend.native_row_major_matrix)
- return false;
-
- // Non-matrix or column-major matrix types do not need to be converted.
- if (!has_member_decoration(type.self, index, DecorationRowMajor))
- return false;
-
- // Generate a function that will swap matrix elements from row-major to column-major.
- // Packed row-matrix should just use transpose() function.
- if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked))
- {
- const auto mbr_type = get<SPIRType>(type.member_types[index]);
- add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
- }
-
- return true;
+ return has_member_decoration(type.self, index, DecorationRowMajor);
}
-// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix.
-void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows)
+string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+ bool is_packed)
{
- SPVFuncImpl spv_func;
- if (cols == rows) // Square matrix...just use transpose() function
- return;
- else if (cols == 2 && rows == 3)
- spv_func = SPVFuncImplRowMajor2x3;
- else if (cols == 2 && rows == 4)
- spv_func = SPVFuncImplRowMajor2x4;
- else if (cols == 3 && rows == 2)
- spv_func = SPVFuncImplRowMajor3x2;
- else if (cols == 3 && rows == 4)
- spv_func = SPVFuncImplRowMajor3x4;
- else if (cols == 4 && rows == 2)
- spv_func = SPVFuncImplRowMajor4x2;
- else if (cols == 4 && rows == 3)
- spv_func = SPVFuncImplRowMajor4x3;
- else
- SPIRV_CROSS_THROW("Could not convert row-major matrix.");
-
- auto rslt = spv_function_implementations.insert(spv_func);
- if (rslt.second)
+ if (!is_matrix(exp_type))
{
- suppress_missing_prototypes = true;
- force_recompile();
+ return CompilerGLSL::convert_row_major_matrix(move(exp_str), exp_type, physical_type_id, is_packed);
}
-}
-
-// Wraps the expression string in a function call that converts the
-// row_major matrix result of the expression to a column_major matrix.
-string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
-{
- strip_enclosed_expression(exp_str);
-
- string func_name;
-
- // Square and packed matrices can just use transpose
- if (exp_type.columns == exp_type.vecsize || is_packed)
- func_name = "transpose";
else
- func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
-
- return join(func_name, "(", exp_str, ")");
+ {
+ strip_enclosed_expression(exp_str);
+ if (physical_type_id != 0 || is_packed)
+ exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true);
+ return join("transpose(", exp_str, ")");
+ }
}
// Called automatically at the end of the entry point function
@@ -5025,55 +7428,66 @@ void CompilerMSL::emit_fixup()
string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier)
{
- auto &membertype = get<SPIRType>(member_type_id);
-
- // If this member requires padding to maintain alignment, emit a dummy padding member.
- MSLStructMemberKey key = get_struct_member_key(type.self, index);
- uint32_t pad_len = struct_member_padding[key];
- if (pad_len > 0)
- statement("char _m", index, "_pad", "[", to_string(pad_len), "];");
+ if (member_is_remapped_physical_type(type, index))
+ member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+ auto &physical_type = get<SPIRType>(member_type_id);
// If this member is packed, mark it as so.
- string pack_pfx = "";
-
- const SPIRType *effective_membertype = &membertype;
- SPIRType override_type;
+ string pack_pfx;
uint32_t orig_id = 0;
if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))
orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID);
- if (member_is_packed_type(type, index))
+ bool row_major = false;
+ if (is_matrix(physical_type))
+ row_major = has_member_decoration(type.self, index, DecorationRowMajor);
+
+ SPIRType row_major_physical_type;
+ const SPIRType *declared_type = &physical_type;
+
+ if (member_is_packed_physical_type(type, index))
{
// If we're packing a matrix, output an appropriate typedef
- if (membertype.basetype == SPIRType::Struct)
+ if (physical_type.basetype == SPIRType::Struct)
{
- pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ ";
+ SPIRV_CROSS_THROW("Cannot emit a packed struct currently.");
}
- else if (membertype.vecsize > 1 && membertype.columns > 1)
+ else if (is_matrix(physical_type))
{
+ uint32_t rows = physical_type.vecsize;
+ uint32_t cols = physical_type.columns;
pack_pfx = "packed_";
- string base_type = membertype.width == 16 ? "half" : "float";
+ if (row_major)
+ {
+ // These are stored transposed.
+ rows = physical_type.columns;
+ cols = physical_type.vecsize;
+ pack_pfx = "packed_rm_";
+ }
+ string base_type = physical_type.width == 16 ? "half" : "float";
string td_line = "typedef ";
- td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
+ td_line += "packed_" + base_type + to_string(rows);
td_line += " " + pack_pfx;
- td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
+ // Use the actual matrix size here.
+ td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize);
+ td_line += "[" + to_string(cols) + "]";
td_line += ";";
add_typedef_line(td_line);
}
- else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
- {
- // A "packed" float array, but we pad here instead to 4-vector.
- override_type = membertype;
- override_type.vecsize = 4;
- effective_membertype = &override_type;
- }
else
pack_pfx = "packed_";
}
+ else if (row_major)
+ {
+ // Need to declare type with flipped vecsize/columns.
+ row_major_physical_type = physical_type;
+ swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
+ declared_type = &row_major_physical_type;
+ }
// Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
- if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2)
+ if (msl_options.is_ios() && physical_type.basetype == SPIRType::Image && physical_type.image.sampled == 2)
{
if (!has_decoration(orig_id, DecorationNonWritable))
SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS.");
@@ -5081,13 +7495,13 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
// Array information is baked into these types.
string array_type;
- if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler &&
- membertype.basetype != SPIRType::SampledImage)
+ if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler &&
+ physical_type.basetype != SPIRType::SampledImage)
{
- array_type = type_to_array_glsl(membertype);
+ array_type = type_to_array_glsl(physical_type);
}
- return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index),
+ return join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
member_attribute_qualifier(type, index), array_type, ";");
}
@@ -5095,9 +7509,26 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier, uint32_t)
{
+ // If this member requires padding to maintain its declared offset, emit a dummy padding member before it.
+ if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget))
+ {
+ uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget);
+ statement("char _m", index, "_pad", "[", pad_len, "];");
+ }
+
statement(to_struct_member(type, member_type_id, index, qualifier));
}
+void CompilerMSL::emit_struct_padding_target(const SPIRType &type)
+{
+ uint32_t struct_size = get_declared_struct_size_msl(type, true, true);
+ uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget);
+ if (target_size < struct_size)
+ SPIRV_CROSS_THROW("Cannot pad with negative bytes.");
+ else if (target_size > struct_size)
+ statement("char _m0_final_padding[", target_size - struct_size, "];");
+}
+
// Return a MSL qualifier for the specified function attribute member
string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index)
{
@@ -5109,9 +7540,16 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
BuiltIn builtin = BuiltInMax;
bool is_builtin = is_member_builtin(type, index, &builtin);
- if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID))
- return join(" [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID),
- ")]]");
+ if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
+ {
+ string quals = join(
+ " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+ if (interlocked_resources.count(
+ get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+ quals += ", raster_order_group(0)";
+ quals += "]]";
+ return quals;
+ }
// Vertex function inputs
if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@@ -5188,6 +7626,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
{
case BuiltInInvocationId:
case BuiltInPrimitiveId:
+ case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
+ case BuiltInSubgroupSize: // FIXME: Should work in any stage
return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
case BuiltInPatchVertices:
return "";
@@ -5239,18 +7679,25 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
// Fragment function inputs
if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput)
{
- string quals = "";
+ string quals;
if (is_builtin)
{
switch (builtin)
{
+ case BuiltInViewIndex:
+ if (!msl_options.multiview)
+ break;
+ /* fallthrough */
case BuiltInFrontFacing:
case BuiltInPointCoord:
case BuiltInFragCoord:
case BuiltInSampleId:
case BuiltInSampleMask:
case BuiltInLayer:
+ case BuiltInBaryCoordNV:
+ case BuiltInBaryCoordNoPerspNV:
quals = builtin_qualifier(builtin);
+ break;
default:
break;
@@ -5268,6 +7715,20 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
quals = string("user(locn") + convert_to_string(locn) + ")";
}
}
+
+ if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV)
+ {
+ if (has_member_decoration(type.self, index, DecorationFlat) ||
+ has_member_decoration(type.self, index, DecorationCentroid) ||
+ has_member_decoration(type.self, index, DecorationSample) ||
+ has_member_decoration(type.self, index, DecorationNoPerspective))
+ {
+ // NoPerspective is baked into the builtin type.
+ SPIRV_CROSS_THROW(
+ "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs.");
+ }
+ }
+
// Don't bother decorating integers with the 'flat' attribute; it's
// the default (in fact, the only option). Also don't bother with the
// FragCoord builtin; it's always noperspective on Metal.
@@ -5304,6 +7765,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
quals += "center_no_perspective";
}
}
+
if (!quals.empty())
return " [[" + quals + "]]";
}
@@ -5315,6 +7777,11 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
{
switch (builtin)
{
+ case BuiltInFragStencilRefEXT:
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up.");
+ return string(" [[") + builtin_qualifier(builtin) + "]]";
+
case BuiltInSampleMask:
case BuiltInFragDepth:
return string(" [[") + builtin_qualifier(builtin) + "]]";
@@ -5347,6 +7814,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
case BuiltInNumWorkgroups:
case BuiltInLocalInvocationId:
case BuiltInLocalInvocationIndex:
+ case BuiltInNumSubgroups:
+ case BuiltInSubgroupId:
+ case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
+ case BuiltInSubgroupSize: // FIXME: Should work in any stage
return string(" [[") + builtin_qualifier(builtin) + "]]";
default:
@@ -5417,8 +7888,10 @@ string CompilerMSL::func_type_decl(SPIRType &type)
execution.output_vertices, ") ]] vertex");
break;
case ExecutionModelFragment:
- entry_type =
- execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment";
+ entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) ||
+ execution.flags.get(ExecutionModePostDepthCoverage) ?
+ "[[ early_fragment_tests ]] fragment" :
+ "fragment";
break;
case ExecutionModelTessellationControl:
if (!msl_options.supports_msl_version(1, 2))
@@ -5442,21 +7915,37 @@ string CompilerMSL::func_type_decl(SPIRType &type)
string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
{
const auto &type = get<SPIRType>(argument.basetype);
+ return get_type_address_space(type, argument.self, true);
+}
+string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
+{
+ // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+ Bitset flags;
+ auto *var = maybe_get<SPIRVariable>(id);
+ if (var && type.basetype == SPIRType::Struct &&
+ (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
+ flags = get_buffer_block_flags(id);
+ else
+ flags = get_decoration_bitset(id);
+
+ const char *addr_space = nullptr;
switch (type.storage)
{
case StorageClassWorkgroup:
- return "threadgroup";
+ addr_space = "threadgroup";
+ break;
case StorageClassStorageBuffer:
{
// For arguments from variable pointers, we use the write count deduction, so
// we should not assume any constness here. Only for global SSBOs.
bool readonly = false;
- if (has_decoration(type.self, DecorationBlock))
- readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
+ if (!var || has_decoration(type.self, DecorationBlock))
+ readonly = flags.get(DecorationNonWritable);
- return readonly ? "const device" : "device";
+ addr_space = readonly ? "const device" : "device";
+ break;
}
case StorageClassUniform:
@@ -5466,93 +7955,58 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
{
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
if (ssbo)
- {
- bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
- return readonly ? "const device" : "device";
- }
+ addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
else
- return "constant";
+ addr_space = "constant";
}
+ else if (!argument)
+ addr_space = "constant";
break;
case StorageClassFunction:
case StorageClassGeneric:
- // No address space for plain values.
- return type.pointer ? "thread" : "";
+ break;
case StorageClassInput:
- if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
- return "threadgroup";
+ if (get_execution_model() == ExecutionModelTessellationControl && var &&
+ var->basevariable == stage_in_ptr_var_id)
+ addr_space = "threadgroup";
break;
case StorageClassOutput:
if (capture_output_to_buffer)
- return "device";
+ addr_space = "device";
break;
default:
break;
}
- return "thread";
+ if (!addr_space)
+ // No address space for plain values.
+ addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
+
+ return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
}
-string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
+const char *CompilerMSL::to_restrict(uint32_t id, bool space)
{
- switch (type.storage)
- {
- case StorageClassWorkgroup:
- return "threadgroup";
-
- case StorageClassStorageBuffer:
- {
- // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
- Bitset flags;
- if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
+ // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+ Bitset flags;
+ if (ir.ids[id].get_type() == TypeVariable)
+ {
+ uint32_t type_id = expression_type_id(id);
+ auto &type = expression_type(id);
+ if (type.basetype == SPIRType::Struct &&
+ (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
flags = get_buffer_block_flags(id);
else
flags = get_decoration_bitset(id);
-
- return flags.get(DecorationNonWritable) ? "const device" : "device";
- }
-
- case StorageClassUniform:
- case StorageClassUniformConstant:
- case StorageClassPushConstant:
- if (type.basetype == SPIRType::Struct)
- {
- bool ssbo = has_decoration(type.self, DecorationBufferBlock);
- if (ssbo)
- {
- // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
- Bitset flags;
- if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
- flags = get_buffer_block_flags(id);
- else
- flags = get_decoration_bitset(id);
-
- return flags.get(DecorationNonWritable) ? "const device" : "device";
- }
- else
- return "constant";
- }
- break;
-
- case StorageClassFunction:
- case StorageClassGeneric:
- // No address space for plain values.
- return type.pointer ? "thread" : "";
-
- case StorageClassOutput:
- if (capture_output_to_buffer)
- return "device";
- break;
-
- default:
- break;
}
+ else
+ flags = get_decoration_bitset(id);
- return "thread";
+ return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
}
string CompilerMSL::entry_point_arg_stage_in()
@@ -5581,8 +8035,9 @@ string CompilerMSL::entry_point_arg_stage_in()
void CompilerMSL::entry_point_args_builtin(string &ep_args)
{
// Builtin variables
+ SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
- BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type;
+ auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
// Don't emit SamplePosition as a separate parameter. In the entry
// point, we get that by calling get_sample_position() on the sample ID.
@@ -5590,20 +8045,66 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
get_variable_data_type(var).basetype != SPIRType::Struct &&
get_variable_data_type(var).basetype != SPIRType::ControlPointArray)
{
+ // If the builtin is not part of the active input builtin set, don't emit it.
+ // Relevant for multiple entry-point modules which might declare unused builtins.
+ if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id))
+ return;
+
+ // Remember this variable. We may need to correct its type.
+ active_builtins.push_back(make_pair(&var, bi_type));
+
+ // These builtins are emitted specially. If we pass this branch, the builtin directly matches
+ // a MSL builtin.
if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation &&
bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner &&
bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize &&
- bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance)
+ bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
+ bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
+ bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
+ bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask && bi_type != BuiltInDeviceIndex &&
+ ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
+ bi_type != BuiltInViewIndex) &&
+ (get_execution_model() == ExecutionModelGLCompute ||
+ (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
+ (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
{
if (!ep_args.empty())
ep_args += ", ";
- ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
- ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
+ ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
+ ep_args += " [[" + builtin_qualifier(bi_type);
+ if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
+ {
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW("Post-depth coverage requires Metal 2.0.");
+ if (!msl_options.is_ios())
+ SPIRV_CROSS_THROW("Post-depth coverage is only supported on iOS.");
+ ep_args += ", post_depth_coverage";
+ }
+ ep_args += "]]";
}
}
+
+ if (var.storage == StorageClassInput &&
+ has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase))
+ {
+ // This is a special implicit builtin, not corresponding to any SPIR-V builtin,
+ // which holds the base that was passed to vkCmdDispatchBase(). If it's present,
+ // assume we emitted it for a good reason.
+ assert(msl_options.supports_msl_version(1, 2));
+ if (!ep_args.empty())
+ ep_args += ", ";
+
+ ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]";
+ }
});
+ // Correct the types of all encountered active builtins. We couldn't do this before
+ // because ensure_correct_builtin_type() may increase the bound, which isn't allowed
+ // while iterating over IDs.
+ for (auto &var : active_builtins)
+ var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second);
+
// Vertex and instance index built-ins
if (needs_vertex_idx_arg)
ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty());
@@ -5670,6 +8171,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
{
string ep_args = entry_point_arg_stage_in();
+ Bitset claimed_bindings;
for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
{
@@ -5684,12 +8186,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
if (!ep_args.empty())
ep_args += ", ";
- ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
- ep_args += " [[buffer(" + convert_to_string(i) + ")]]";
+ // Check if the argument buffer binding itself has been remapped.
+ uint32_t buffer_binding;
+ auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
+ if (itr != end(resource_bindings))
+ {
+ buffer_binding = itr->second.first.msl_buffer;
+ itr->second.second = true;
+ }
+ else
+ {
+ // As a fallback, directly map desc set <-> binding.
+ // If that was taken, take the next buffer binding.
+ if (claimed_bindings.get(i))
+ buffer_binding = next_metal_resource_index_buffer;
+ else
+ buffer_binding = i;
+ }
+
+ claimed_bindings.set(buffer_binding);
+
+ ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
+ ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
- // Makes it more practical for testing, since the push constant block can occupy the first available
- // buffer slot if it's not bound explicitly.
- next_metal_resource_index_buffer = i + 1;
+ next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
}
entry_point_args_discrete_descriptors(ep_args);
@@ -5701,6 +8221,28 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
return ep_args;
}
+const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const
+{
+ // Try by ID.
+ {
+ auto itr = constexpr_samplers_by_id.find(id);
+ if (itr != end(constexpr_samplers_by_id))
+ return &itr->second;
+ }
+
+ // Try by binding.
+ {
+ uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
+ uint32_t binding = get_decoration(id, DecorationBinding);
+
+ auto itr = constexpr_samplers_by_binding.find({ desc_set, binding });
+ if (itr != end(constexpr_samplers_by_binding))
+ return &itr->second;
+ }
+
+ return nullptr;
+}
+
void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
{
// Output resources, sorted by resource index & type
@@ -5712,43 +8254,65 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
string name;
SPIRType::BaseType basetype;
uint32_t index;
+ uint32_t plane;
};
SmallVector<Resource> resources;
- ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+ ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
!is_hidden_variable(var))
{
auto &type = get_variable_data_type(var);
- uint32_t var_id = var.self;
- if (var.storage != StorageClassPushConstant)
+ // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
+ // But we won't know when the argument buffer is encoded whether this image will have
+ // a NonWritable decoration. So just use discrete arguments for all storage images
+ // on iOS.
+ if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) &&
+ var.storage != StorageClassPushConstant)
{
uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
if (descriptor_set_is_argument_buffer(desc_set))
return;
}
+ const MSLConstexprSampler *constexpr_sampler = nullptr;
+ if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
+ {
+ constexpr_sampler = find_constexpr_sampler(var_id);
+ if (constexpr_sampler)
+ {
+ // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
+ constexpr_samplers_by_id[var_id] = *constexpr_sampler;
+ }
+ }
+
if (type.basetype == SPIRType::SampledImage)
{
add_resource_name(var_id);
- resources.push_back(
- { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) });
- if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0)
+ uint32_t plane_count = 1;
+ if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+ plane_count = constexpr_sampler->planes;
+
+ for (uint32_t i = 0; i < plane_count; i++)
+ resources.push_back({ &var, to_name(var_id), SPIRType::Image,
+ get_metal_resource_index(var, SPIRType::Image, i), i });
+
+ if (type.image.dim != DimBuffer && !constexpr_sampler)
{
resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler,
- get_metal_resource_index(var, SPIRType::Sampler) });
+ get_metal_resource_index(var, SPIRType::Sampler), 0 });
}
}
- else if (constexpr_samplers.count(var_id) == 0)
+ else if (!constexpr_sampler)
{
// constexpr samplers are not declared as resources.
add_resource_name(var_id);
resources.push_back(
- { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+ { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
}
}
});
@@ -5789,17 +8353,24 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
{
if (!ep_args.empty())
ep_args += ", ";
- ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
- convert_to_string(i);
- ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+ ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
+ r.name + "_" + convert_to_string(i);
+ ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
}
}
else
{
if (!ep_args.empty())
ep_args += ", ";
- ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
- ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+ ep_args +=
+ get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
+ ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
}
break;
}
@@ -5813,10 +8384,25 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
if (!ep_args.empty())
ep_args += ", ";
ep_args += image_type_glsl(type, var_id) + " " + r.name;
- ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+ if (r.plane > 0)
+ ep_args += join(plane_name_suffix, r.plane);
+ ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
break;
default:
- SPIRV_CROSS_THROW("Unexpected resource type");
+ if (!ep_args.empty())
+ ep_args += ", ";
+ if (!type.pointer)
+ ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " +
+ type_to_glsl(type, var_id) + "& " + r.name;
+ else
+ ep_args += type_to_glsl(type, var_id) + " " + r.name;
+ ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
break;
}
}
@@ -5838,28 +8424,62 @@ string CompilerMSL::entry_point_args_classic(bool append_comma)
void CompilerMSL::fix_up_shader_inputs_outputs()
{
- // Look for sampled images. Add hooks to set up the swizzle constants.
+ // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths.
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = get_variable_data_type(var);
-
uint32_t var_id = var.self;
+ bool ssbo = has_decoration(type.self, DecorationBufferBlock);
- if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
- var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
- !is_hidden_variable(var))
+ if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var))
{
if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
{
auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
- auto &aux_type = expression_type(aux_buffer_id);
bool is_array_type = !type.array.empty();
- // If we have an array of images, we need to be able to index into it, so take a pointer instead.
- statement("constant uint32_t", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
- is_array_type ? " = &" : " = ", to_name(aux_buffer_id), ".",
- to_member_name(aux_type, k_aux_mbr_idx_swizzle_const), "[",
- convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+ uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+ if (descriptor_set_is_argument_buffer(desc_set))
+ {
+ statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
+ is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
+ ".spvSwizzleConstants", "[",
+ convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+ }
+ else
+ {
+ // If we have an array of images, we need to be able to index into it, so take a pointer instead.
+ statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
+ is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[",
+ convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+ }
+ });
+ }
+ }
+ else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) &&
+ !is_hidden_variable(var))
+ {
+ if (buffers_requiring_array_length.count(var.self))
+ {
+ auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
+ entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
+ bool is_array_type = !type.array.empty();
+
+ uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+ if (descriptor_set_is_argument_buffer(desc_set))
+ {
+ statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
+ is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
+ ".spvBufferSizeConstants", "[",
+ convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
+ }
+ else
+ {
+ // If we have an array of images, we need to be able to index into it, so take a pointer instead.
+ statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
+ is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[",
+ convert_to_string(get_metal_resource_index(var, type.basetype)), "];");
+ }
});
}
}
@@ -5911,6 +8531,225 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
}
break;
+ case BuiltInSubgroupLocalInvocationId:
+ // This is natively supported in compute shaders.
+ if (get_execution_model() == ExecutionModelGLCompute)
+ break;
+
+ // This is natively supported in fragment shaders in MSL 2.2.
+ if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+ break;
+
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW(
+ "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
+
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW(
+ "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
+
+ // Shaders other than compute shaders don't support the SIMD-group
+ // builtins directly, but we can emulate them using the SIMD-group
+ // functions. This might break if some of the subgroup terminated
+ // before reaching the entry point.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+ " = simd_prefix_exclusive_sum(1);");
+ });
+ break;
+ case BuiltInSubgroupSize:
+ // This is natively supported in compute shaders.
+ if (get_execution_model() == ExecutionModelGLCompute)
+ break;
+
+ // This is natively supported in fragment shaders in MSL 2.2.
+ if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+ break;
+
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
+
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
+
+ entry_func.fixup_hooks_in.push_back(
+ [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
+ break;
+ case BuiltInSubgroupEqMask:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+ to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
+ to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
+ to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
+ });
+ break;
+ case BuiltInSubgroupGeMask:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+ entry_func.fixup_hooks_in.push_back([=]() {
+ // Case where index < 32, size < 32:
+ // mask0 = bfe(0xFFFFFFFF, index, size - index);
+ // mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
+ // Case where index < 32 but size >= 32:
+ // mask0 = bfe(0xFFFFFFFF, index, 32 - index);
+ // mask1 = bfe(0xFFFFFFFF, 0, size - 32);
+ // Case where index >= 32:
+ // mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
+ // mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
+ // This is expressed without branches to avoid divergent
+ // control flow--hence the complicated min/max expressions.
+ // This is further complicated by the fact that if you attempt
+ // to bfe out-of-bounds on Metal, undefined behavior is the
+ // result.
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+ " = uint4(extract_bits(0xFFFFFFFF, min(",
+ to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
+ to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+ to_expression(builtin_subgroup_invocation_id_id),
+ ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
+ to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
+ to_expression(builtin_subgroup_size_id), " - (int)max(",
+ to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
+ });
+ break;
+ case BuiltInSubgroupGtMask:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+ entry_func.fixup_hooks_in.push_back([=]() {
+ // The same logic applies here, except now the index is one
+ // more than the subgroup invocation ID.
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+ " = uint4(extract_bits(0xFFFFFFFF, min(",
+ to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
+ to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+ to_expression(builtin_subgroup_invocation_id_id),
+ " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
+ to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
+ to_expression(builtin_subgroup_size_id), " - (int)max(",
+ to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
+ });
+ break;
+ case BuiltInSubgroupLeMask:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+ " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+ to_expression(builtin_subgroup_invocation_id_id),
+ " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+ to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
+ });
+ break;
+ case BuiltInSubgroupLtMask:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+ " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+ to_expression(builtin_subgroup_invocation_id_id),
+ ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+ to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
+ });
+ break;
+ case BuiltInViewIndex:
+ if (!msl_options.multiview)
+ {
+ // According to the Vulkan spec, when not running under a multiview
+ // render pass, ViewIndex is 0.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
+ });
+ }
+ else if (msl_options.view_index_from_device_index)
+ {
+ // In this case, we take the view index from that of the device we're running on.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+ msl_options.device_index, ";");
+ });
+ // We actually don't want to set the render_target_array_index here.
+ // Since every physical device is rendering a different view,
+ // there's no need for layered rendering here.
+ }
+ else if (get_execution_model() == ExecutionModelFragment)
+ {
+ // Because we adjusted the view index in the vertex shader, we have to
+ // adjust it back here.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
+ });
+ }
+ else if (get_execution_model() == ExecutionModelVertex)
+ {
+ // Metal provides no special support for multiview, so we smuggle
+ // the view index in the instance index.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+ to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
+ " % ", to_expression(view_mask_buffer_id), "[1];");
+ statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
+ "[1];");
+ });
+ // In addition to setting the variable itself, we also need to
+ // set the render_target_array_index with it on output. We have to
+ // offset this by the base view index, because Metal isn't in on
+ // our little game here.
+ entry_func.fixup_hooks_out.push_back([=]() {
+ statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
+ to_expression(view_mask_buffer_id), "[0];");
+ });
+ }
+ break;
+ case BuiltInDeviceIndex:
+ // Metal pipelines belong to the devices which create them, so we'll
+ // need to create a MTLPipelineState for every MTLDevice in a grouped
+ // VkDevice. We can assume, then, that the device index is constant.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+ msl_options.device_index, ";");
+ });
+ break;
+ case BuiltInWorkgroupId:
+ if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId))
+ break;
+
+ // The vkCmdDispatchBase() command lets the client set the base value
+ // of WorkgroupId. Metal has no direct equivalent; we must make this
+ // adjustment ourselves.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";");
+ });
+ break;
+ case BuiltInGlobalInvocationId:
+ if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId))
+ break;
+
+ // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize.
+ // This needs to be adjusted too.
+ entry_func.fixup_hooks_in.push_back([=]() {
+ auto &execution = this->get_entry_point();
+ uint32_t workgroup_size_id = execution.workgroup_size.constant;
+ if (workgroup_size_id)
+ statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+ " * ", to_expression(workgroup_size_id), ";");
+ else
+ statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+ " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ",
+ execution.workgroup_size.z, ");");
+ });
+ break;
default:
break;
}
@@ -5919,65 +8758,96 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
}
// Returns the Metal index of the resource of the specified type as used by the specified variable.
-uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype)
+uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
{
auto &execution = get_entry_point();
auto &var_dec = ir.meta[var.self].decoration;
+ auto &var_type = get<SPIRType>(var.basetype);
uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set;
uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding;
- // If a matching binding has been specified, find and use it
- auto itr = find_if(begin(resource_bindings), end(resource_bindings),
- [&](const pair<MSLResourceBinding, bool> &resource) -> bool {
- return var_desc_set == resource.first.desc_set && var_binding == resource.first.binding &&
- execution.model == resource.first.stage;
- });
+ // If a matching binding has been specified, find and use it.
+ auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding });
+
+ auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ?
+ SPIRVCrossDecorationResourceIndexSecondary :
+ SPIRVCrossDecorationResourceIndexPrimary;
+ if (plane == 1)
+ resource_decoration = SPIRVCrossDecorationResourceIndexTertiary;
+ if (plane == 2)
+ resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary;
if (itr != end(resource_bindings))
{
- itr->second = true;
+ auto &remap = itr->second;
+ remap.second = true;
switch (basetype)
{
- case SPIRType::Struct:
- return itr->first.msl_buffer;
case SPIRType::Image:
- return itr->first.msl_texture;
+ set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane);
+ return remap.first.msl_texture + plane;
case SPIRType::Sampler:
- return itr->first.msl_sampler;
+ set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler);
+ return remap.first.msl_sampler;
default:
- return 0;
+ set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer);
+ return remap.first.msl_buffer;
}
}
- // If there is no explicit mapping of bindings to MSL, use the declared binding.
- if (has_decoration(var.self, DecorationBinding))
- return get_decoration(var.self, DecorationBinding);
+ // If we have already allocated an index, keep using it.
+ if (has_extended_decoration(var.self, resource_decoration))
+ return get_extended_decoration(var.self, resource_decoration);
+
+ // If we did not explicitly remap, allocate bindings on demand.
+ // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
uint32_t binding_stride = 1;
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
- binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar();
+ binding_stride *= to_array_size_literal(type, i);
+
+ assert(binding_stride != 0);
- // If a binding has not been specified, revert to incrementing resource indices
+ // If a binding has not been specified, revert to incrementing resource indices.
uint32_t resource_index;
- switch (basetype)
+
+ bool allocate_argument_buffer_ids = false;
+ uint32_t desc_set = 0;
+
+ if (var.storage != StorageClassPushConstant)
{
- case SPIRType::Struct:
- resource_index = next_metal_resource_index_buffer;
- next_metal_resource_index_buffer += binding_stride;
- break;
- case SPIRType::Image:
- resource_index = next_metal_resource_index_texture;
- next_metal_resource_index_texture += binding_stride;
- break;
- case SPIRType::Sampler:
- resource_index = next_metal_resource_index_sampler;
- next_metal_resource_index_sampler += binding_stride;
- break;
- default:
- resource_index = 0;
- break;
+ desc_set = get_decoration(var.self, DecorationDescriptorSet);
+ allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set);
+ }
+
+ if (allocate_argument_buffer_ids)
+ {
+ // Allocate from a flat ID binding space.
+ resource_index = next_metal_resource_ids[desc_set];
+ next_metal_resource_ids[desc_set] += binding_stride;
+ }
+ else
+ {
+ // Allocate from plain bindings which are allocated per resource type.
+ switch (basetype)
+ {
+ case SPIRType::Image:
+ resource_index = next_metal_resource_index_texture;
+ next_metal_resource_index_texture += binding_stride;
+ break;
+ case SPIRType::Sampler:
+ resource_index = next_metal_resource_index_sampler;
+ next_metal_resource_index_sampler += binding_stride;
+ break;
+ default:
+ resource_index = next_metal_resource_index_buffer;
+ next_metal_resource_index_buffer += binding_stride;
+ break;
+ }
}
+
+ set_extended_decoration(var.self, resource_decoration, resource_index);
return resource_index;
}
@@ -6008,13 +8878,28 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
if (constref)
decl += "const ";
+ // If this is a combined image-sampler for a 2D image with floating-point type,
+ // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter
+ // for a global, then we need to emit a "dynamic" combined image-sampler.
+ // Unfortunately, this is necessary to properly support passing around
+ // combined image-samplers with Y'CbCr conversions on them.
+ bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage &&
+ type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) &&
+ spv_function_implementations.count(SPVFuncImplDynamicImageSampler);
+
bool builtin = is_builtin_variable(var);
- if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)
+ if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
decl += type_to_glsl(type, arg.id);
else if (builtin)
- decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)));
+ decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id);
else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type))
decl += join(type_to_glsl(type, arg.id), "*");
+ else if (is_dynamic_img_sampler)
+ {
+ decl += join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">");
+ // Mark the variable so that we can handle passing it to another function.
+ set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+ }
else
decl += type_to_glsl(type, arg.id);
@@ -6037,6 +8922,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
// non-constant arrays, but we can create thread const from constant.
decl = string("thread const ") + decl;
decl += " (&";
+ const char *restrict_kw = to_restrict(name_id);
+ if (*restrict_kw)
+ {
+ decl += " ";
+ decl += restrict_kw;
+ }
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
@@ -6057,20 +8948,36 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
if (msl_options.argument_buffers)
{
- // An awkward case where we need to emit *more* address space declarations (yay!).
- // An example is where we pass down an array of buffer pointers to leaf functions.
- // It's a constant array containing pointers to constants.
- // The pointer array is always constant however. E.g.
- // device SSBO * constant (&array)[N].
- // const device SSBO * constant (&array)[N].
- // constant SSBO * constant (&array)[N].
- // However, this only matters for argument buffers, since for MSL 1.0 style codegen,
- // we emit the buffer array on stack instead, and that seems to work just fine apparently.
- if (storage == StorageClassUniform || storage == StorageClassStorageBuffer)
- decl += " constant";
+ uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet);
+ if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) &&
+ descriptor_set_is_argument_buffer(desc_set))
+ {
+ // An awkward case where we need to emit *more* address space declarations (yay!).
+ // An example is where we pass down an array of buffer pointers to leaf functions.
+ // It's a constant array containing pointers to constants.
+ // The pointer array is always constant however. E.g.
+ // device SSBO * constant (&array)[N].
+ // const device SSBO * constant (&array)[N].
+ // constant SSBO * constant (&array)[N].
+ // However, this only matters for argument buffers, since for MSL 1.0 style codegen,
+ // we emit the buffer array on stack instead, and that seems to work just fine apparently.
+
+ // If the argument was marked as being in device address space, any pointer to member would
+ // be const device, not constant.
+ if (argument_buffer_device_storage_mask & (1u << desc_set))
+ decl += " const device";
+ else
+ decl += " constant";
+ }
}
decl += " (&";
+ const char *restrict_kw = to_restrict(name_id);
+ if (*restrict_kw)
+ {
+ decl += " ";
+ decl += restrict_kw;
+ }
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
@@ -6088,6 +8995,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
decl += "&";
decl += " ";
+ decl += to_restrict(name_id);
decl += to_expression(name_id);
}
else
@@ -6266,6 +9174,7 @@ void CompilerMSL::replace_illegal_names()
"M_2_SQRTPI",
"M_SQRT2",
"M_SQRT1_2",
+ "quad_broadcast",
};
static const unordered_set<string> illegal_func_names = {
@@ -6465,6 +9374,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
// Pointer?
if (type.pointer)
{
+ const char *restrict_kw;
type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
switch (type.basetype)
{
@@ -6476,6 +9386,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
default:
// Anything else can be a raw pointer.
type_name += "*";
+ restrict_kw = to_restrict(id);
+ if (*restrict_kw)
+ {
+ type_name += " ";
+ type_name += restrict_kw;
+ }
break;
}
return type_name;
@@ -6527,10 +9443,14 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
type_name = "uint";
break;
case SPIRType::Int64:
- type_name = "long"; // Currently unsupported
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
+ type_name = "long";
break;
case SPIRType::UInt64:
- type_name = "size_t";
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
+ type_name = "ulong";
break;
case SPIRType::Half:
type_name = "half";
@@ -6748,6 +9668,234 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
return img_type_name;
}
+void CompilerMSL::emit_subgroup_op(const Instruction &i)
+{
+ const uint32_t *ops = stream(i);
+ auto op = static_cast<Op>(i.op);
+
+ // Metal 2.0 is required. iOS only supports quad ops. macOS only supports
+ // broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1).
+ // Note that iOS makes no distinction between a quad-group and a subgroup;
+ // all subgroups are quad-groups there.
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
+
+ if (msl_options.is_ios())
+ {
+ switch (op)
+ {
+ default:
+ SPIRV_CROSS_THROW("iOS only supports quad-group operations.");
+ case OpGroupNonUniformBroadcast:
+ case OpGroupNonUniformShuffle:
+ case OpGroupNonUniformShuffleXor:
+ case OpGroupNonUniformShuffleUp:
+ case OpGroupNonUniformShuffleDown:
+ case OpGroupNonUniformQuadSwap:
+ case OpGroupNonUniformQuadBroadcast:
+ break;
+ }
+ }
+
+ if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+ {
+ switch (op)
+ {
+ default:
+ SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.0 and up.");
+ case OpGroupNonUniformBroadcast:
+ case OpGroupNonUniformShuffle:
+ case OpGroupNonUniformShuffleXor:
+ case OpGroupNonUniformShuffleUp:
+ case OpGroupNonUniformShuffleDown:
+ break;
+ }
+ }
+
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+
+ auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+ if (scope != ScopeSubgroup)
+ SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+
+ switch (op)
+ {
+ case OpGroupNonUniformElect:
+ emit_op(result_type, id, "simd_is_first()", true);
+ break;
+
+ case OpGroupNonUniformBroadcast:
+ emit_binary_func_op(result_type, id, ops[3], ops[4],
+ msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
+ break;
+
+ case OpGroupNonUniformBroadcastFirst:
+ emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
+ break;
+
+ case OpGroupNonUniformBallot:
+ emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallot");
+ break;
+
+ case OpGroupNonUniformInverseBallot:
+ emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract");
+ break;
+
+ case OpGroupNonUniformBallotBitExtract:
+ emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBallotBitExtract");
+ break;
+
+ case OpGroupNonUniformBallotFindLSB:
+ emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
+ break;
+
+ case OpGroupNonUniformBallotFindMSB:
+ emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
+ break;
+
+ case OpGroupNonUniformBallotBitCount:
+ {
+ auto operation = static_cast<GroupOperation>(ops[3]);
+ if (operation == GroupOperationReduce)
+ emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
+ else if (operation == GroupOperationInclusiveScan)
+ emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
+ "spvSubgroupBallotInclusiveBitCount");
+ else if (operation == GroupOperationExclusiveScan)
+ emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
+ "spvSubgroupBallotExclusiveBitCount");
+ else
+ SPIRV_CROSS_THROW("Invalid BitCount operation.");
+ break;
+ }
+
+ case OpGroupNonUniformShuffle:
+ emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
+ break;
+
+ case OpGroupNonUniformShuffleXor:
+ emit_binary_func_op(result_type, id, ops[3], ops[4],
+ msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
+ break;
+
+ case OpGroupNonUniformShuffleUp:
+ emit_binary_func_op(result_type, id, ops[3], ops[4],
+ msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
+ break;
+
+ case OpGroupNonUniformShuffleDown:
+ emit_binary_func_op(result_type, id, ops[3], ops[4],
+ msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
+ break;
+
+ case OpGroupNonUniformAll:
+ emit_unary_func_op(result_type, id, ops[3], "simd_all");
+ break;
+
+ case OpGroupNonUniformAny:
+ emit_unary_func_op(result_type, id, ops[3], "simd_any");
+ break;
+
+ case OpGroupNonUniformAllEqual:
+ emit_unary_func_op(result_type, id, ops[3], "spvSubgroupAllEqual");
+ break;
+
+ // clang-format off
+#define MSL_GROUP_OP(op, msl_op) \
+case OpGroupNonUniform##op: \
+ { \
+ auto operation = static_cast<GroupOperation>(ops[3]); \
+ if (operation == GroupOperationReduce) \
+ emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
+ else if (operation == GroupOperationInclusiveScan) \
+ emit_unary_func_op(result_type, id, ops[4], "simd_prefix_inclusive_" #msl_op); \
+ else if (operation == GroupOperationExclusiveScan) \
+ emit_unary_func_op(result_type, id, ops[4], "simd_prefix_exclusive_" #msl_op); \
+ else if (operation == GroupOperationClusteredReduce) \
+ { \
+ /* Only cluster sizes of 4 are supported. */ \
+ uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+ if (cluster_size != 4) \
+ SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
+ emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
+ } \
+ else \
+ SPIRV_CROSS_THROW("Invalid group operation."); \
+ break; \
+ }
+ MSL_GROUP_OP(FAdd, sum)
+ MSL_GROUP_OP(FMul, product)
+ MSL_GROUP_OP(IAdd, sum)
+ MSL_GROUP_OP(IMul, product)
+#undef MSL_GROUP_OP
+ // The others, unfortunately, don't support InclusiveScan or ExclusiveScan.
+#define MSL_GROUP_OP(op, msl_op) \
+case OpGroupNonUniform##op: \
+ { \
+ auto operation = static_cast<GroupOperation>(ops[3]); \
+ if (operation == GroupOperationReduce) \
+ emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
+ else if (operation == GroupOperationInclusiveScan) \
+ SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
+ else if (operation == GroupOperationExclusiveScan) \
+ SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
+ else if (operation == GroupOperationClusteredReduce) \
+ { \
+ /* Only cluster sizes of 4 are supported. */ \
+ uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+ if (cluster_size != 4) \
+ SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
+ emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
+ } \
+ else \
+ SPIRV_CROSS_THROW("Invalid group operation."); \
+ break; \
+ }
+ MSL_GROUP_OP(FMin, min)
+ MSL_GROUP_OP(FMax, max)
+ MSL_GROUP_OP(SMin, min)
+ MSL_GROUP_OP(SMax, max)
+ MSL_GROUP_OP(UMin, min)
+ MSL_GROUP_OP(UMax, max)
+ MSL_GROUP_OP(BitwiseAnd, and)
+ MSL_GROUP_OP(BitwiseOr, or)
+ MSL_GROUP_OP(BitwiseXor, xor)
+ MSL_GROUP_OP(LogicalAnd, and)
+ MSL_GROUP_OP(LogicalOr, or)
+ MSL_GROUP_OP(LogicalXor, xor)
+ // clang-format on
+
+ case OpGroupNonUniformQuadSwap:
+ {
+ // We can implement this easily based on the following table giving
+ // the target lane ID from the direction and current lane ID:
+ // Direction
+ // | 0 | 1 | 2 |
+ // ---+---+---+---+
+ // L 0 | 1 2 3
+ // a 1 | 0 3 2
+ // n 2 | 3 0 1
+ // e 3 | 2 1 0
+ // Notice that target = source ^ (direction + 1).
+ uint32_t mask = get<SPIRConstant>(ops[4]).scalar() + 1;
+ uint32_t mask_id = ir.increase_bound_by(1);
+ set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
+ emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
+ break;
+ }
+
+ case OpGroupNonUniformQuadBroadcast:
+ emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
+ break;
+
+ default:
+ SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+ }
+
+ register_control_dependent_expression(id);
+}
+
string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
if (out_type.basetype == in_type.basetype)
@@ -6807,6 +9955,7 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
case BuiltInCullDistance:
case BuiltInLayer:
case BuiltInFragDepth:
+ case BuiltInFragStencilRefEXT:
case BuiltInSampleMask:
if (get_execution_model() == ExecutionModelTessellationControl)
break;
@@ -6815,6 +9964,12 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
break;
+ case BuiltInBaryCoordNV:
+ case BuiltInBaryCoordNoPerspNV:
+ if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
+ return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
+ break;
+
case BuiltInTessLevelOuter:
if (get_execution_model() == ExecutionModelTessellationEvaluation)
{
@@ -6879,7 +10034,14 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
case BuiltInPointSize:
return "point_size";
case BuiltInPosition:
- return "position";
+ if (position_invariant)
+ {
+ if (!msl_options.supports_msl_version(2, 1))
+ SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up.");
+ return "position, invariant";
+ }
+ else
+ return "position";
case BuiltInLayer:
return "render_target_array_index";
case BuiltInViewportIndex:
@@ -6900,6 +10062,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
return "threadgroup_position_in_grid";
case ExecutionModelTessellationEvaluation:
return "patch_id";
+ case ExecutionModelFragment:
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS.");
+ else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2.");
+ return "primitive_id";
default:
SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model.");
}
@@ -6928,6 +10096,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
case BuiltInSamplePosition:
// Shouldn't be reached.
SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
+ case BuiltInViewIndex:
+ if (execution.model != ExecutionModelFragment)
+ SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
+ // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
+ // so we can get it from there.
+ return "render_target_array_index";
// Fragment function out
case BuiltInFragDepth:
@@ -6938,6 +10112,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
else
return "depth(any)";
+ case BuiltInFragStencilRefEXT:
+ return "stencil";
+
// Compute function in
case BuiltInGlobalInvocationId:
return "thread_position_in_grid";
@@ -6954,13 +10131,75 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
case BuiltInLocalInvocationIndex:
return "thread_index_in_threadgroup";
+ case BuiltInSubgroupSize:
+ if (execution.model == ExecutionModelFragment)
+ {
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
+ return "threads_per_simdgroup";
+ }
+ else
+ {
+ // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
+ // but not in fragment.
+ return "thread_execution_width";
+ }
+
+ case BuiltInNumSubgroups:
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+ return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
+
+ case BuiltInSubgroupId:
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+ return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
+
+ case BuiltInSubgroupLocalInvocationId:
+ if (execution.model == ExecutionModelFragment)
+ {
+ if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
+ return "thread_index_in_simdgroup";
+ }
+ else
+ {
+ if (!msl_options.supports_msl_version(2))
+ SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+ return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+ }
+
+ case BuiltInSubgroupEqMask:
+ case BuiltInSubgroupGeMask:
+ case BuiltInSubgroupGtMask:
+ case BuiltInSubgroupLeMask:
+ case BuiltInSubgroupLtMask:
+ // Shouldn't be reached.
+ SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL.");
+
+ case BuiltInBaryCoordNV:
+ // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+ else if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
+ return "barycentric_coord, center_perspective";
+
+ case BuiltInBaryCoordNoPerspNV:
+ // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
+ if (msl_options.is_ios())
+ SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+ else if (!msl_options.supports_msl_version(2, 2))
+ SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
+ return "barycentric_coord, center_no_perspective";
+
default:
return "unsupported-built-in";
}
}
// Returns an MSL string type declaration for a SPIR-V builtin
-string CompilerMSL::builtin_type_decl(BuiltIn builtin)
+string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
{
const SPIREntryPoint &execution = get_entry_point();
switch (builtin)
@@ -7030,11 +10269,24 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin)
return "uint";
case BuiltInSamplePosition:
return "float2";
+ case BuiltInViewIndex:
+ return "uint";
+
+ case BuiltInHelperInvocation:
+ return "bool";
+
+ case BuiltInBaryCoordNV:
+ case BuiltInBaryCoordNoPerspNV:
+ // Use the type as declared, can be 1, 2 or 3 components.
+ return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
// Fragment function out
case BuiltInFragDepth:
return "float";
+ case BuiltInFragStencilRefEXT:
+ return "uint";
+
// Compute function in
case BuiltInGlobalInvocationId:
case BuiltInLocalInvocationId:
@@ -7042,10 +10294,20 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin)
case BuiltInWorkgroupId:
return "uint3";
case BuiltInLocalInvocationIndex:
+ case BuiltInNumSubgroups:
+ case BuiltInSubgroupId:
+ case BuiltInSubgroupSize:
+ case BuiltInSubgroupLocalInvocationId:
return "uint";
+ case BuiltInSubgroupEqMask:
+ case BuiltInSubgroupGeMask:
+ case BuiltInSubgroupGtMask:
+ case BuiltInSubgroupLeMask:
+ case BuiltInSubgroupLtMask:
+ return "uint4";
- case BuiltInHelperInvocation:
- return "bool";
+ case BuiltInDeviceIndex:
+ return "int";
default:
return "unsupported-built-in-type";
@@ -7066,11 +10328,101 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
return bi_arg;
}
-// Returns the byte size of a struct member.
-size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
+const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const
+{
+ if (member_is_remapped_physical_type(type, index))
+ return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID));
+ else
+ return get<SPIRType>(type.member_types[index]);
+}
+
+uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
+ // Array stride in MSL is always size * array_size. sizeof(float3) == 16,
+ // unlike GLSL and HLSL where array stride would be 16 and size 12.
+
+ // We could use parent type here and recurse, but that makes creating physical type remappings
+ // far more complicated. We'd rather just create the final type, and ignore having to create the entire type
+ // hierarchy in order to compute this value, so make a temporary type on the stack.
+
+ auto basic_type = type;
+ basic_type.array.clear();
+ basic_type.array_size_literal.clear();
+ uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major);
+
+ uint32_t dimensions = uint32_t(type.array.size());
+ assert(dimensions > 0);
+ dimensions--;
+
+ // Multiply together every dimension, except the last one.
+ for (uint32_t dim = 0; dim < dimensions; dim++)
+ {
+ uint32_t array_size = to_array_size_literal(type, dim);
+ value_size *= max(array_size, 1u);
+ }
+
+ return value_size;
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const
+{
+ return get_declared_type_array_stride_msl(get_physical_member_type(type, index),
+ member_is_packed_physical_type(type, index),
+ has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const
+{
+ // For packed matrices, we just use the size of the vector type.
+ // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type.
+ if (packed)
+ return (type.width / 8) * (row_major ? type.columns : type.vecsize);
+ else
+ return get_declared_type_alignment_msl(type, false, row_major);
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const
{
- auto &type = get<SPIRType>(struct_type.member_types[index]);
+ return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index),
+ member_is_packed_physical_type(type, index),
+ has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment,
+ bool ignore_padding) const
+{
+ // If we have a target size, that is the declared size as well.
+ if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget))
+ return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget);
+
+ if (struct_type.member_types.empty())
+ return 0;
+
+ uint32_t mbr_cnt = uint32_t(struct_type.member_types.size());
+
+ // In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
+ uint32_t alignment = 1;
+
+ if (!ignore_alignment)
+ {
+ for (uint32_t i = 0; i < mbr_cnt; i++)
+ {
+ uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i);
+ alignment = max(alignment, mbr_alignment);
+ }
+ }
+
+ // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends
+ // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment.
+ uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1);
+ uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1);
+ msl_size = (msl_size + alignment - 1) & ~(alignment - 1);
+ return msl_size;
+}
+// Returns the byte size of a struct member.
+uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
switch (type.basetype)
{
case SPIRType::Unknown:
@@ -7083,40 +10435,47 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
default:
{
- // For arrays, we can use ArrayStride to get an easy check.
- // Runtime arrays will have zero size so force to min of one.
if (!type.array.empty())
{
uint32_t array_size = to_array_size_literal(type);
- return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u);
+ return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u);
}
if (type.basetype == SPIRType::Struct)
+ return get_declared_struct_size_msl(type);
+
+ if (is_packed)
{
- // The size of a struct in Metal is aligned up to its natural alignment.
- auto size = get_declared_struct_size(type);
- auto alignment = get_declared_struct_member_alignment(struct_type, index);
- return (size + alignment - 1) & ~(alignment - 1);
+ return type.vecsize * type.columns * (type.width / 8);
}
+ else
+ {
+ // An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
+ uint32_t vecsize = type.vecsize;
+ uint32_t columns = type.columns;
- uint32_t component_size = type.width / 8;
- uint32_t vecsize = type.vecsize;
- uint32_t columns = type.columns;
+ if (row_major)
+ swap(vecsize, columns);
- // An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
- if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked))
- vecsize = 4;
+ if (vecsize == 3)
+ vecsize = 4;
- return component_size * vecsize * columns;
+ return vecsize * columns * (type.width / 8);
+ }
}
}
}
-// Returns the byte alignment of a struct member.
-size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const
+uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const
{
- auto &type = get<SPIRType>(struct_type.member_types[index]);
+ return get_declared_type_size_msl(get_physical_member_type(type, index),
+ member_is_packed_physical_type(type, index),
+ has_member_decoration(type.self, index, DecorationRowMajor));
+}
+// Returns the byte alignment of a type.
+uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
switch (type.basetype)
{
case SPIRType::Unknown:
@@ -7127,12 +10486,19 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
case SPIRType::Sampler:
SPIRV_CROSS_THROW("Querying alignment of opaque object.");
+ case SPIRType::Int64:
+ SPIRV_CROSS_THROW("long types are not supported in buffers in MSL.");
+ case SPIRType::UInt64:
+ SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL.");
+ case SPIRType::Double:
+ SPIRV_CROSS_THROW("double types are not supported in buffers in MSL.");
+
case SPIRType::Struct:
{
// In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
uint32_t alignment = 1;
for (uint32_t i = 0; i < type.member_types.size(); i++)
- alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i)));
+ alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i)));
return alignment;
}
@@ -7141,25 +10507,28 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
// Alignment of packed type is the same as the underlying component or column size.
// Alignment of unpacked type is the same as the vector size.
// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
- if (member_is_packed_type(struct_type, index))
- {
- // This is getting pretty complicated.
- // The special case of array of float/float2 needs to be handled here.
- uint32_t packed_type_id =
- get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType);
- const SPIRType *packed_type = packed_type_id != 0 ? &get<SPIRType>(packed_type_id) : nullptr;
- if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) &&
- packed_type->basetype != SPIRType::Struct)
- return (packed_type->width / 8) * 4;
- else
- return (type.width / 8) * (type.columns == 3 ? 4 : type.columns);
+ if (is_packed)
+ {
+ // If we have packed_T and friends, the alignment is always scalar.
+ return type.width / 8;
}
else
- return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
+ {
+ // This is the general rule for MSL. Size == alignment.
+ uint32_t vecsize = row_major ? type.columns : type.vecsize;
+ return (type.width / 8) * (vecsize == 3 ? 4 : vecsize);
+ }
}
}
}
+uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const
+{
+ return get_declared_type_alignment_msl(get_physical_member_type(type, index),
+ member_is_packed_physical_type(type, index),
+ has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
bool CompilerMSL::skip_argument(uint32_t) const
{
return false;
@@ -7207,7 +10576,7 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar
case OpImageDrefGather:
compiler.has_sampled_images =
compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2]));
- compiler.needs_aux_buffer_def = compiler.needs_aux_buffer_def || compiler.has_sampled_images;
+ compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images;
break;
default:
break;
@@ -7215,6 +10584,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar
return true;
}
+// If a needed custom function wasn't added before, add it and force a recompile.
+void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func)
+{
+ if (spv_function_implementations.count(spv_func) == 0)
+ {
+ spv_function_implementations.insert(spv_func);
+ suppress_missing_prototypes = true;
+ force_recompile();
+ }
+}
+
bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
{
// Since MSL exists in a single execution scope, function prototype declarations are not
@@ -7267,6 +10647,37 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
uses_atomics = true;
break;
+ case OpGroupNonUniformInverseBallot:
+ needs_subgroup_invocation_id = true;
+ break;
+
+ case OpGroupNonUniformBallotBitCount:
+ if (args[3] != GroupOperationReduce)
+ needs_subgroup_invocation_id = true;
+ break;
+
+ case OpArrayLength:
+ {
+ auto *var = compiler.maybe_get_backing_variable(args[2]);
+ if (var)
+ compiler.buffers_requiring_array_length.insert(var->self);
+ break;
+ }
+
+ case OpInBoundsAccessChain:
+ case OpAccessChain:
+ case OpPtrAccessChain:
+ {
+ // OpArrayLength might want to know if taking ArrayLength of an array of SSBOs.
+ uint32_t result_type = args[0];
+ uint32_t id = args[1];
+ uint32_t ptr = args[2];
+ compiler.set<SPIRExpression>(id, "", result_type, true);
+ compiler.register_read(id, ptr, true);
+ compiler.ir.ids[id].set_allow_type_rewrite();
+ break;
+ }
+
default:
break;
}
@@ -7361,32 +10772,6 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]];
if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native)
return SPVFuncImplTexelBufferCoords;
-
- if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples)
- return SPVFuncImplTextureSwizzle;
-
- break;
- }
-
- case OpImageSampleExplicitLod:
- case OpImageSampleProjExplicitLod:
- case OpImageSampleDrefExplicitLod:
- case OpImageSampleProjDrefExplicitLod:
- case OpImageSampleImplicitLod:
- case OpImageSampleProjImplicitLod:
- case OpImageSampleDrefImplicitLod:
- case OpImageSampleProjDrefImplicitLod:
- case OpImageGather:
- case OpImageDrefGather:
- if (compiler.msl_options.swizzle_texture_samples)
- return SPVFuncImplTextureSwizzle;
- break;
-
- case OpCompositeConstruct:
- {
- auto &type = compiler.get<SPIRType>(args[0]);
- if (type.array.size() > 1) // We need to use copies to build the composite.
- return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size() - 1);
break;
}
@@ -7395,7 +10780,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
uint32_t extension_set = args[2];
if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
{
- GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
+ auto op_450 = static_cast<GLSLstd450>(args[3]);
switch (op_450)
{
case GLSLstd450Radians:
@@ -7410,6 +10795,27 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
return SPVFuncImplFindUMsb;
case GLSLstd450SSign:
return SPVFuncImplSSign;
+ case GLSLstd450Reflect:
+ {
+ auto &type = compiler.get<SPIRType>(args[0]);
+ if (type.vecsize == 1)
+ return SPVFuncImplReflectScalar;
+ break;
+ }
+ case GLSLstd450Refract:
+ {
+ auto &type = compiler.get<SPIRType>(args[0]);
+ if (type.vecsize == 1)
+ return SPVFuncImplRefractScalar;
+ break;
+ }
+ case GLSLstd450FaceForward:
+ {
+ auto &type = compiler.get<SPIRType>(args[0]);
+ if (type.vecsize == 1)
+ return SPVFuncImplFaceForwardScalar;
+ break;
+ }
case GLSLstd450MatrixInverse:
{
auto &mat_type = compiler.get<SPIRType>(args[0]);
@@ -7433,6 +10839,25 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
break;
}
+ case OpGroupNonUniformBallot:
+ return SPVFuncImplSubgroupBallot;
+
+ case OpGroupNonUniformInverseBallot:
+ case OpGroupNonUniformBallotBitExtract:
+ return SPVFuncImplSubgroupBallotBitExtract;
+
+ case OpGroupNonUniformBallotFindLSB:
+ return SPVFuncImplSubgroupBallotFindLSB;
+
+ case OpGroupNonUniformBallotFindMSB:
+ return SPVFuncImplSubgroupBallotFindMSB;
+
+ case OpGroupNonUniformBallotBitCount:
+ return SPVFuncImplSubgroupBallotBitCount;
+
+ case OpGroupNonUniformAllEqual:
+ return SPVFuncImplSubgroupAllEqual;
+
default:
break;
}
@@ -7497,14 +10922,20 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
meta.members.resize(max(type.member_types.size(), meta.members.size()));
}
-void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler)
+void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler)
{
auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type.");
if (!type.array.empty())
SPIRV_CROSS_THROW("Can not remap array of samplers.");
- constexpr_samplers[id] = sampler;
+ constexpr_samplers_by_id[id] = sampler;
+}
+
+void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding,
+ const MSLConstexprSampler &sampler)
+{
+ constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
}
void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
@@ -7529,6 +10960,15 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
case BuiltInNumWorkgroups:
case BuiltInLayer:
case BuiltInViewportIndex:
+ case BuiltInFragStencilRefEXT:
+ case BuiltInPrimitiveId:
+ case BuiltInSubgroupSize:
+ case BuiltInSubgroupLocalInvocationId:
+ case BuiltInViewIndex:
+ case BuiltInVertexIndex:
+ case BuiltInInstanceIndex:
+ case BuiltInBaseInstance:
+ case BuiltInBaseVertex:
expected_type = SPIRType::UInt;
break;
@@ -7569,6 +11009,9 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
{
case BuiltInLayer:
case BuiltInViewportIndex:
+ case BuiltInFragStencilRefEXT:
+ case BuiltInPrimitiveId:
+ case BuiltInViewIndex:
expected_type = SPIRType::UInt;
break;
@@ -7639,9 +11082,14 @@ void CompilerMSL::analyze_argument_buffers()
string name;
SPIRType::BaseType basetype;
uint32_t index;
+ uint32_t plane;
};
SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
+ bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {};
+ bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {};
+ bool needs_buffer_sizes = false;
+
ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) {
if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
var.storage == StorageClassStorageBuffer) &&
@@ -7658,36 +11106,116 @@ void CompilerMSL::analyze_argument_buffers()
if (desc_set >= kMaxArgumentBuffers)
SPIRV_CROSS_THROW("Descriptor set index is out of range.");
+ const MSLConstexprSampler *constexpr_sampler = nullptr;
+ if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
+ {
+ constexpr_sampler = find_constexpr_sampler(var_id);
+ if (constexpr_sampler)
+ {
+ // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
+ constexpr_samplers_by_id[var_id] = *constexpr_sampler;
+ }
+ }
+
if (type.basetype == SPIRType::SampledImage)
{
add_resource_name(var_id);
- uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image);
- uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
-
- // Avoid trivial conflicts where we didn't remap.
- // This will let us at least compile test cases without having to instrument remaps.
- if (sampler_resource_index == image_resource_index)
- sampler_resource_index += type.array.empty() ? 1 : to_array_size_literal(type);
+ uint32_t plane_count = 1;
+ if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+ plane_count = constexpr_sampler->planes;
- resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index });
+ for (uint32_t i = 0; i < plane_count; i++)
+ {
+ uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
+ resources_in_set[desc_set].push_back(
+ { &var, to_name(var_id), SPIRType::Image, image_resource_index, i });
+ }
- if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0)
+ if (type.image.dim != DimBuffer && !constexpr_sampler)
{
+ uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
resources_in_set[desc_set].push_back(
- { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index });
+ { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 });
}
}
- else if (constexpr_samplers.count(var_id) == 0)
+ else if (!constexpr_sampler)
{
// constexpr samplers are not declared as resources.
- add_resource_name(var_id);
- resources_in_set[desc_set].push_back(
- { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+ if (!msl_options.is_ios() || type.basetype != SPIRType::Image || type.image.sampled != 2)
+ {
+ add_resource_name(var_id);
+ resources_in_set[desc_set].push_back(
+ { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
+ }
+ }
+
+ // Check if this descriptor set needs a swizzle buffer.
+ if (needs_swizzle_buffer_def && is_sampled_image_type(type))
+ set_needs_swizzle_buffer[desc_set] = true;
+ else if (buffers_requiring_array_length.count(var_id) != 0)
+ {
+ set_needs_buffer_sizes[desc_set] = true;
+ needs_buffer_sizes = true;
}
}
});
+ if (needs_swizzle_buffer_def || needs_buffer_sizes)
+ {
+ uint32_t uint_ptr_type_id = 0;
+
+ // We might have to add a swizzle buffer resource to the set.
+ for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
+ {
+ if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set])
+ continue;
+
+ if (uint_ptr_type_id == 0)
+ {
+ uint32_t offset = ir.increase_bound_by(2);
+ uint32_t type_id = offset;
+ uint_ptr_type_id = offset + 1;
+
+ // Create a buffer to hold extra data, including the swizzle constants.
+ SPIRType uint_type;
+ uint_type.basetype = SPIRType::UInt;
+ uint_type.width = 32;
+ set<SPIRType>(type_id, uint_type);
+
+ SPIRType uint_type_pointer = uint_type;
+ uint_type_pointer.pointer = true;
+ uint_type_pointer.pointer_depth = 1;
+ uint_type_pointer.parent_type = type_id;
+ uint_type_pointer.storage = StorageClassUniform;
+ set<SPIRType>(uint_ptr_type_id, uint_type_pointer);
+ set_decoration(uint_ptr_type_id, DecorationArrayStride, 4);
+ }
+
+ if (set_needs_swizzle_buffer[desc_set])
+ {
+ uint32_t var_id = ir.increase_bound_by(1);
+ auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
+ set_name(var_id, "spvSwizzleConstants");
+ set_decoration(var_id, DecorationDescriptorSet, desc_set);
+ set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
+ resources_in_set[desc_set].push_back(
+ { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
+ }
+
+ if (set_needs_buffer_sizes[desc_set])
+ {
+ uint32_t var_id = ir.increase_bound_by(1);
+ auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
+ set_name(var_id, "spvBufferSizeConstants");
+ set_decoration(var_id, DecorationDescriptorSet, desc_set);
+ set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
+ resources_in_set[desc_set].push_back(
+ { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
+ }
+ }
+ }
+
for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
{
auto &resources = resources_in_set[desc_set];
@@ -7702,8 +11230,20 @@ void CompilerMSL::analyze_argument_buffers()
argument_buffer_ids[desc_set] = next_id;
auto &buffer_type = set<SPIRType>(type_id);
- buffer_type.storage = StorageClassUniform;
+
buffer_type.basetype = SPIRType::Struct;
+
+ if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0)
+ {
+ buffer_type.storage = StorageClassStorageBuffer;
+ // Make sure the argument buffer gets marked as const device.
+ set_decoration(next_id, DecorationNonWritable);
+ // Need to mark the type as a Block to enable this.
+ set_decoration(type_id, DecorationBlock);
+ }
+ else
+ buffer_type.storage = StorageClassUniform;
+
set_name(type_id, join("spvDescriptorSetBuffer", desc_set));
auto &ptr_type = set<SPIRType>(ptr_type_id);
@@ -7727,6 +11267,8 @@ void CompilerMSL::analyze_argument_buffers()
auto &var = *resource.var;
auto &type = get_variable_data_type(var);
string mbr_name = ensure_valid_name(resource.name, "m");
+ if (resource.plane > 0)
+ mbr_name += join(plane_name_suffix, resource.plane);
set_member_name(buffer_type.self, member_index, mbr_name);
if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler)
@@ -7754,12 +11296,22 @@ void CompilerMSL::analyze_argument_buffers()
}
else
{
+ uint32_t binding = get_decoration(var.self, DecorationBinding);
+ SetBindingPair pair = { desc_set, binding };
+
if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
resource.basetype == SPIRType::SampledImage)
{
// Drop pointer information when we emit the resources into a struct.
buffer_type.member_types.push_back(get_variable_data_type_id(var));
- set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
+ if (resource.plane == 0)
+ set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
+ }
+ else if (buffers_requiring_dynamic_offset.count(pair))
+ {
+ // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
+ buffer_type.member_types.push_back(var.basetype);
+ buffers_requiring_dynamic_offset[pair].second = var.self;
}
else
{
@@ -7772,7 +11324,7 @@ void CompilerMSL::analyze_argument_buffers()
}
}
- set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationArgumentBufferID,
+ set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary,
resource.index);
set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID,
var.self);
@@ -7780,3 +11332,35 @@ void CompilerMSL::analyze_argument_buffers()
}
}
}
+
+bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const
+{
+ return desc_set == other.desc_set && binding == other.binding;
+}
+
+bool CompilerMSL::SetBindingPair::operator<(const SetBindingPair &other) const
+{
+ return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding);
+}
+
+bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const
+{
+ return model == other.model && desc_set == other.desc_set && binding == other.binding;
+}
+
+size_t CompilerMSL::InternalHasher::operator()(const SetBindingPair &value) const
+{
+ // Quality of hash doesn't really matter here.
+ auto hash_set = std::hash<uint32_t>()(value.desc_set);
+ auto hash_binding = std::hash<uint32_t>()(value.binding);
+ return (hash_set * 0x10001b31) ^ hash_binding;
+}
+
+size_t CompilerMSL::InternalHasher::operator()(const StageSetBinding &value) const
+{
+ // Quality of hash doesn't really matter here.
+ auto hash_model = std::hash<uint32_t>()(value.model);
+ auto hash_set = std::hash<uint32_t>()(value.desc_set);
+ auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set;
+ return (tmp_hash * 0x10001b31) ^ value.binding;
+}
diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
index 8d3a8ad..d16b593 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp
@@ -20,6 +20,7 @@
#include "spirv_glsl.hpp"
#include <map>
#include <set>
+#include <stddef.h>
#include <unordered_map>
#include <unordered_set>
@@ -53,9 +54,9 @@ struct MSLVertexAttr
// Matches the binding index of a MSL resource for a binding within a descriptor set.
// Taken together, the stage, desc_set and binding combine to form a reference to a resource
// descriptor used in a particular shading stage.
-// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set,
-// the binding reference we remap to will become an [[id(N)]] attribute within
-// the "descriptor set" argument buffer structure.
+// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set,
+// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we
+// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure.
// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a
// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
struct MSLResourceBinding
@@ -121,6 +122,50 @@ enum MSLSamplerBorderColor
MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff
};
+enum MSLFormatResolution
+{
+ MSL_FORMAT_RESOLUTION_444 = 0,
+ MSL_FORMAT_RESOLUTION_422,
+ MSL_FORMAT_RESOLUTION_420,
+ MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+};
+
+enum MSLChromaLocation
+{
+ MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+ MSL_CHROMA_LOCATION_MIDPOINT,
+ MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+};
+
+enum MSLComponentSwizzle
+{
+ MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+ MSL_COMPONENT_SWIZZLE_ZERO,
+ MSL_COMPONENT_SWIZZLE_ONE,
+ MSL_COMPONENT_SWIZZLE_R,
+ MSL_COMPONENT_SWIZZLE_G,
+ MSL_COMPONENT_SWIZZLE_B,
+ MSL_COMPONENT_SWIZZLE_A,
+ MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrModelConversion
+{
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+ MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrRange
+{
+ MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+ MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+ MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+};
+
struct MSLConstexprSampler
{
MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED;
@@ -136,13 +181,40 @@ struct MSLConstexprSampler
float lod_clamp_max = 1000.0f;
int max_anisotropy = 1;
+ // Sampler Y'CbCr conversion parameters
+ uint32_t planes = 0;
+ MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444;
+ MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST;
+ MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+ MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+ MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY
+ MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
+ MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL;
+ uint32_t bpc = 8;
+
bool compare_enable = false;
bool lod_clamp_enable = false;
bool anisotropy_enable = false;
-};
+ bool ycbcr_conversion_enable = false;
-// Tracks the type ID and member index of a struct member
-using MSLStructMemberKey = uint64_t;
+ MSLConstexprSampler()
+ {
+ for (uint32_t i = 0; i < 4; i++)
+ swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY;
+ }
+ bool swizzle_is_identity() const
+ {
+ return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY &&
+ swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY);
+ }
+ bool swizzle_has_one_or_zero() const
+ {
+ return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE ||
+ swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE ||
+ swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE ||
+ swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE);
+ }
+};
// Special constant used in a MSLResourceBinding desc_set
// element to indicate the bindings for the push constants.
@@ -152,11 +224,21 @@ static const uint32_t kPushConstDescSet = ~(0u);
// element to indicate the bindings for the push constants.
static const uint32_t kPushConstBinding = 0;
-static const uint32_t kMaxArgumentBuffers = 8;
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding for swizzle buffers.
+static const uint32_t kSwizzleBufferBinding = ~(1u);
+
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
+static const uint32_t kBufferSizeBufferBinding = ~(2u);
-// The current version of the aux buffer structure. It must be incremented any time a
-// new field is added to the aux buffer.
-#define SPIRV_CROSS_MSL_AUX_BUFFER_STRUCT_VERSION 1
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding used for the argument buffer itself.
+// This buffer binding should be kept as small as possible as all automatic bindings for buffers
+// will start at max(kArgumentBufferBinding) + 1.
+static const uint32_t kArgumentBufferBinding = ~(3u);
+
+static const uint32_t kMaxArgumentBuffers = 8;
// Decompiles SPIR-V to Metal Shading Language
class CompilerMSL : public CompilerGLSL
@@ -174,17 +256,24 @@ public:
Platform platform = macOS;
uint32_t msl_version = make_msl_version(1, 2);
uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers
- uint32_t aux_buffer_index = 30;
+ uint32_t swizzle_buffer_index = 30;
uint32_t indirect_params_buffer_index = 29;
uint32_t shader_output_buffer_index = 28;
uint32_t shader_patch_output_buffer_index = 27;
uint32_t shader_tess_factor_buffer_index = 26;
+ uint32_t buffer_size_buffer_index = 25;
+ uint32_t view_mask_buffer_index = 24;
+ uint32_t dynamic_offsets_buffer_index = 23;
uint32_t shader_input_wg_index = 0;
+ uint32_t device_index = 0;
bool enable_point_size_builtin = true;
bool disable_rasterization = false;
bool capture_output_to_buffer = false;
bool swizzle_texture_samples = false;
bool tess_domain_origin_lower_left = false;
+ bool multiview = false;
+ bool view_index_from_device_index = false;
+ bool dispatch_base = false;
// Enable use of MSL 2.0 indirect argument buffers.
// MSL 2.0 must also be enabled.
@@ -212,7 +301,7 @@ public:
msl_version = make_msl_version(major, minor, patch);
}
- bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
+ bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const
{
return msl_version >= make_msl_version(major, minor, patch);
}
@@ -243,31 +332,52 @@ public:
}
// Provide feedback to calling API to allow it to pass an auxiliary
- // buffer if the shader needs it.
- bool needs_aux_buffer() const
+ // swizzle buffer if the shader needs it.
+ bool needs_swizzle_buffer() const
+ {
+ return used_swizzle_buffer;
+ }
+
+ // Provide feedback to calling API to allow it to pass a buffer
+ // containing STORAGE_BUFFER buffer sizes to support OpArrayLength.
+ bool needs_buffer_size_buffer() const
+ {
+ return !buffers_requiring_array_length.empty();
+ }
+
+ // Provide feedback to calling API to allow it to pass a buffer
+ // containing the view mask for the current multiview subpass.
+ bool needs_view_mask_buffer() const
{
- return used_aux_buffer;
+ return msl_options.multiview && !msl_options.view_index_from_device_index;
+ }
+
+ // Provide feedback to calling API to allow it to pass a buffer
+ // containing the dispatch base workgroup ID.
+ bool needs_dispatch_base_buffer() const
+ {
+ return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2);
}
// Provide feedback to calling API to allow it to pass an output
// buffer if the shader needs it.
bool needs_output_buffer() const
{
- return capture_output_to_buffer && stage_out_var_id != 0;
+ return capture_output_to_buffer && stage_out_var_id != ID(0);
}
// Provide feedback to calling API to allow it to pass a patch output
// buffer if the shader needs it.
bool needs_patch_output_buffer() const
{
- return capture_output_to_buffer && patch_stage_out_var_id != 0;
+ return capture_output_to_buffer && patch_stage_out_var_id != ID(0);
}
// Provide feedback to calling API to allow it to pass an input threadgroup
// buffer if the shader needs it.
bool needs_input_threadgroup_mem() const
{
- return capture_output_to_buffer && stage_in_var_id != 0;
+ return capture_output_to_buffer && stage_in_var_id != ID(0);
}
explicit CompilerMSL(std::vector<uint32_t> spirv);
@@ -288,14 +398,52 @@ public:
// the set/binding combination was used by the MSL code.
void add_msl_resource_binding(const MSLResourceBinding &resource);
+ // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
+ // in this shader. index is the index within the dynamic offset buffer to use. This
+ // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
+ // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers
+ // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with
+ // an offset taken from the dynamic offset buffer.
+ void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
+
// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
// This corresponds to VK_KHR_push_descriptor in Vulkan.
void add_discrete_descriptor_set(uint32_t desc_set);
+ // If an argument buffer is large enough, it may need to be in the device storage space rather than
+ // constant. Opt-in to this behavior here on a per set basis.
+ void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage);
+
// Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader.
bool is_msl_vertex_attribute_used(uint32_t location);
+
+ // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here.
+ // Constexpr samplers are always assumed to be emitted.
+ // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped
+ // by remap_constexpr_sampler(_by_binding).
bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding);
+ // This must only be called after a successful call to CompilerMSL::compile().
+ // For a variable resource ID obtained through reflection API, report the automatically assigned resource index.
+ // If the descriptor set was part of an argument buffer, report the [[id(N)]],
+ // or [[buffer/texture/sampler]] binding for other resources.
+ // If the resource was a combined image sampler, report the image binding here,
+ // use the _secondary version of this call to query the sampler half of the resource.
+ // If no binding exists, uint32_t(-1) is returned.
+ uint32_t get_automatic_msl_resource_binding(uint32_t id) const;
+
+ // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the
+ // sampler's binding is returned instead. For any other resource type, -1 is returned.
+ uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const;
+
+ // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images,
+ // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned.
+ uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const;
+
+ // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images,
+ // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned.
+ uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const;
+
// Compiles the SPIR-V code into Metal Shading Language.
std::string compile() override;
@@ -305,7 +453,12 @@ public:
// The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler.
// This can be used on both combined image/samplers (sampler2D) or standalone samplers.
// The remapped sampler must not be an array of samplers.
- void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
+ // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways.
+ void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler);
+
+ // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID.
+ // Remaps based on ID take priority over set/binding remaps.
+ void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler);
// If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect
// to use for a particular location. The default is 4 if number of components is not overridden.
@@ -337,13 +490,44 @@ protected:
SPVFuncImplInverse4x4,
SPVFuncImplInverse3x3,
SPVFuncImplInverse2x2,
- SPVFuncImplRowMajor2x3,
- SPVFuncImplRowMajor2x4,
- SPVFuncImplRowMajor3x2,
- SPVFuncImplRowMajor3x4,
- SPVFuncImplRowMajor4x2,
- SPVFuncImplRowMajor4x3,
+ // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
+ // emitted before them.
+ SPVFuncImplForwardArgs,
+ // Likewise, this must come before *Swizzle.
+ SPVFuncImplGetSwizzle,
SPVFuncImplTextureSwizzle,
+ SPVFuncImplGatherSwizzle,
+ SPVFuncImplGatherCompareSwizzle,
+ SPVFuncImplSubgroupBallot,
+ SPVFuncImplSubgroupBallotBitExtract,
+ SPVFuncImplSubgroupBallotFindLSB,
+ SPVFuncImplSubgroupBallotFindMSB,
+ SPVFuncImplSubgroupBallotBitCount,
+ SPVFuncImplSubgroupAllEqual,
+ SPVFuncImplReflectScalar,
+ SPVFuncImplRefractScalar,
+ SPVFuncImplFaceForwardScalar,
+ SPVFuncImplChromaReconstructNearest2Plane,
+ SPVFuncImplChromaReconstructNearest3Plane,
+ SPVFuncImplChromaReconstructLinear422CositedEven2Plane,
+ SPVFuncImplChromaReconstructLinear422CositedEven3Plane,
+ SPVFuncImplChromaReconstructLinear422Midpoint2Plane,
+ SPVFuncImplChromaReconstructLinear422Midpoint3Plane,
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane,
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane,
+ SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane,
+ SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane,
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane,
+ SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane,
+ SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane,
+ SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane,
+ SPVFuncImplExpandITUFullRange,
+ SPVFuncImplExpandITUNarrowRange,
+ SPVFuncImplConvertYCbCrBT709,
+ SPVFuncImplConvertYCbCrBT601,
+ SPVFuncImplConvertYCbCrBT2020,
+ SPVFuncImplDynamicImageSampler,
+
SPVFuncImplArrayCopyMultidimMax = 6
};
@@ -351,30 +535,37 @@ protected:
void emit_instruction(const Instruction &instr) override;
void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count) override;
+ void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+ const uint32_t *args, uint32_t count) override;
void emit_header() override;
void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
+ void emit_subgroup_op(const Instruction &i) override;
+ std::string to_texture_op(const Instruction &i, bool *forward,
+ SmallVector<uint32_t> &inherited_expressions) override;
void emit_fixup() override;
std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const std::string &qualifier = "");
void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const std::string &qualifier = "", uint32_t base_offset = 0) override;
+ void emit_struct_padding_target(const SPIRType &type) override;
std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
std::string sampler_type(const SPIRType &type);
std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
- size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
- std::string to_func_call_arg(uint32_t id) override;
+ std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
std::string to_name(uint32_t id, bool allow_alias = true) const override;
- std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
- bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref,
- uint32_t lod) override;
- std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
+ std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
+ bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod,
+ uint32_t minlod) override;
+ std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x,
uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
- uint32_t comp, uint32_t sample, bool *p_forward) override;
+ uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override;
std::string to_initializer_expression(const SPIRVariable &var) override;
- std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override;
+ std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+ bool is_packed, bool row_major) override;
+
std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
bool skip_argument(uint32_t id) const override;
std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override;
@@ -385,7 +576,8 @@ protected:
bool is_patch_block(const SPIRType &type);
bool is_non_native_row_major_matrix(uint32_t id) override;
bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
- std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
+ std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+ bool is_packed) override;
void preprocess_op_codes();
void localize_global_variables();
@@ -426,7 +618,7 @@ protected:
void emit_specialization_constants_and_structs();
void emit_interface_block(uint32_t ib_var_id);
bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
- void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows);
+
void fix_up_shader_inputs_outputs();
std::string func_type_decl(SPIRType &type);
@@ -439,21 +631,43 @@ protected:
std::string ensure_valid_name(std::string name, std::string pfx);
std::string to_sampler_expression(uint32_t id);
std::string to_swizzle_expression(uint32_t id);
+ std::string to_buffer_size_expression(uint32_t id);
std::string builtin_qualifier(spv::BuiltIn builtin);
- std::string builtin_type_decl(spv::BuiltIn builtin);
+ std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0);
std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma);
std::string member_attribute_qualifier(const SPIRType &type, uint32_t index);
std::string argument_decl(const SPIRFunction::Parameter &arg);
std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp);
- uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype);
+ uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0);
uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
- size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
+
+ // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output.
+ // These values can change depending on various extended decorations which control packing rules.
+ // We need to make these rules match up with SPIR-V declared rules.
+ uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const;
+ uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+ uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+ uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const;
+
+ uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
+ uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+ uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+ uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
+
+ const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const;
+
+ uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false,
+ bool ignore_padding = false) const;
+
std::string to_component_argument(uint32_t id);
- void align_struct(SPIRType &ib_type);
- bool is_member_packable(SPIRType &ib_type, uint32_t index);
- MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
+ void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs);
+ void mark_scalar_layout_structs(const SPIRType &ib_type);
+ void mark_struct_members_packed(const SPIRType &type);
+ void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index);
+ bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
std::string get_argument_address_space(const SPIRVariable &argument);
- std::string get_type_address_space(const SPIRType &type, uint32_t id);
+ std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
+ const char *to_restrict(uint32_t id, bool space = true);
SPIRType &get_stage_in_struct_type();
SPIRType &get_stage_out_struct_type();
SPIRType &get_patch_stage_in_struct_type();
@@ -466,8 +680,10 @@ protected:
void add_pragma_line(const std::string &line);
void add_typedef_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
- void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
+ void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage,
+ spv::StorageClass rhs_storage) override;
void build_implicit_builtins();
+ uint32_t build_constant_uint_array_pointer();
void emit_entry_point_declarations() override;
uint32_t builtin_frag_coord_id = 0;
uint32_t builtin_sample_id_id = 0;
@@ -475,9 +691,17 @@ protected:
uint32_t builtin_base_vertex_id = 0;
uint32_t builtin_instance_idx_id = 0;
uint32_t builtin_base_instance_id = 0;
+ uint32_t builtin_view_idx_id = 0;
+ uint32_t builtin_layer_id = 0;
uint32_t builtin_invocation_id_id = 0;
uint32_t builtin_primitive_id_id = 0;
- uint32_t aux_buffer_id = 0;
+ uint32_t builtin_subgroup_invocation_id_id = 0;
+ uint32_t builtin_subgroup_size_id = 0;
+ uint32_t builtin_dispatch_base_id = 0;
+ uint32_t swizzle_buffer_id = 0;
+ uint32_t buffer_size_buffer_id = 0;
+ uint32_t view_mask_buffer_id = 0;
+ uint32_t dynamic_offsets_buffer_id = 0;
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@@ -488,36 +712,65 @@ protected:
bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);
+ void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
+
+ std::string convert_to_f32(const std::string &expr, uint32_t components);
+
Options msl_options;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;
std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_builtin;
std::unordered_set<uint32_t> vtx_attrs_in_use;
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
- std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
SmallVector<uint32_t> vars_needing_early_declaration;
- SmallVector<std::pair<MSLResourceBinding, bool>> resource_bindings;
+ struct SetBindingPair
+ {
+ uint32_t desc_set;
+ uint32_t binding;
+ bool operator==(const SetBindingPair &other) const;
+ bool operator<(const SetBindingPair &other) const;
+ };
+
+ struct StageSetBinding
+ {
+ spv::ExecutionModel model;
+ uint32_t desc_set;
+ uint32_t binding;
+ bool operator==(const StageSetBinding &other) const;
+ };
+
+ struct InternalHasher
+ {
+ size_t operator()(const SetBindingPair &value) const;
+ size_t operator()(const StageSetBinding &value) const;
+ };
+
+ std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
+
uint32_t next_metal_resource_index_buffer = 0;
uint32_t next_metal_resource_index_texture = 0;
uint32_t next_metal_resource_index_sampler = 0;
-
- uint32_t stage_in_var_id = 0;
- uint32_t stage_out_var_id = 0;
- uint32_t patch_stage_in_var_id = 0;
- uint32_t patch_stage_out_var_id = 0;
- uint32_t stage_in_ptr_var_id = 0;
- uint32_t stage_out_ptr_var_id = 0;
+ // Intentionally uninitialized, works around MSVC 2013 bug.
+ uint32_t next_metal_resource_ids[kMaxArgumentBuffers];
+
+ VariableID stage_in_var_id = 0;
+ VariableID stage_out_var_id = 0;
+ VariableID patch_stage_in_var_id = 0;
+ VariableID patch_stage_out_var_id = 0;
+ VariableID stage_in_ptr_var_id = 0;
+ VariableID stage_out_ptr_var_id = 0;
bool has_sampled_images = false;
bool needs_vertex_idx_arg = false;
bool needs_instance_idx_arg = false;
bool is_rasterization_disabled = false;
bool capture_output_to_buffer = false;
- bool needs_aux_buffer_def = false;
- bool used_aux_buffer = false;
+ bool needs_swizzle_buffer_def = false;
+ bool used_swizzle_buffer = false;
bool added_builtin_tess_level = false;
+ bool needs_subgroup_invocation_id = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
@@ -525,17 +778,29 @@ protected:
std::string patch_stage_out_var_name = "patchOut";
std::string sampler_name_suffix = "Smplr";
std::string swizzle_name_suffix = "Swzl";
+ std::string buffer_size_name_suffix = "BufferSize";
+ std::string plane_name_suffix = "Plane";
std::string input_wg_var_name = "gl_in";
std::string output_buffer_var_name = "spvOut";
std::string patch_output_buffer_var_name = "spvPatchOut";
std::string tess_factor_buffer_var_name = "spvTessLevel";
spv::Op previous_instruction_opcode = spv::OpNop;
- std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;
+ // Must be ordered since declaration is in a specific order.
+ std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id;
+ std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding;
+ const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const;
+
+ std::unordered_set<uint32_t> buffers_requiring_array_length;
SmallVector<uint32_t> buffer_arrays;
+ // Must be ordered since array is in a specific order.
+ std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
+
uint32_t argument_buffer_ids[kMaxArgumentBuffers];
uint32_t argument_buffer_discrete_mask = 0;
+ uint32_t argument_buffer_device_storage_mask = 0;
+
void analyze_argument_buffers();
bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
@@ -544,6 +809,8 @@ protected:
bool suppress_missing_prototypes = false;
+ void add_spv_func_and_recompile(SPVFuncImpl spv_func);
+
// OpcodeHandler that handles several MSL preprocessing operations.
struct OpCodePreprocessor : OpcodeHandler
{
@@ -561,6 +828,7 @@ protected:
bool suppress_missing_prototypes = false;
bool uses_atomics = false;
bool uses_resource_write = false;
+ bool needs_subgroup_invocation_id = false;
};
// OpcodeHandler that scans for uses of sampled images
diff --git a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
index 1c0a830..08dcff9 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp
@@ -60,6 +60,7 @@ static bool is_valid_spirv_version(uint32_t version)
case 0x10200: // SPIR-V 1.2
case 0x10300: // SPIR-V 1.3
case 0x10400: // SPIR-V 1.4
+ case 0x10500: // SPIR-V 1.5
return true;
default:
@@ -162,12 +163,15 @@ void Parser::parse(const Instruction &instruction)
case OpSourceContinued:
case OpSourceExtension:
case OpNop:
- case OpLine:
- case OpNoLine:
- case OpString:
case OpModuleProcessed:
break;
+ case OpString:
+ {
+ set<SPIRString>(ops[0], extract_string(ir.spirv, instruction.offset + 1));
+ break;
+ }
+
case OpMemoryModel:
ir.addressing_model = static_cast<AddressingModel>(ops[0]);
ir.memory_model = static_cast<MemoryModel>(ops[1]);
@@ -240,6 +244,8 @@ void Parser::parse(const Instruction &instruction)
auto ext = extract_string(ir.spirv, instruction.offset + 1);
if (ext == "GLSL.std.450")
set<SPIRExtension>(id, SPIRExtension::GLSL);
+ else if (ext == "DebugInfo")
+ set<SPIRExtension>(id, SPIRExtension::SPV_debug_info);
else if (ext == "SPV_AMD_shader_ballot")
set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
@@ -256,6 +262,14 @@ void Parser::parse(const Instruction &instruction)
break;
}
+ case OpExtInst:
+ {
+ // The SPIR-V debug information extended instructions might come at global scope.
+ if (current_block)
+ current_block->ops.push_back(instruction);
+ break;
+ }
+
case OpEntryPoint:
{
auto itr =
@@ -265,7 +279,9 @@ void Parser::parse(const Instruction &instruction)
// Strings need nul-terminator and consume the whole word.
uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2);
- e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length);
+
+ for (uint32_t i = strlen_words + 2; i < instruction.length; i++)
+ e.interface_variables.push_back(ops[i]);
// Set the name of the entry point in case OpName is not provided later.
ir.set_name(ops[1], e.name);
@@ -556,10 +572,6 @@ void Parser::parse(const Instruction &instruction)
type.image.sampled = ops[6];
type.image.format = static_cast<ImageFormat>(ops[7]);
type.image.access = (length >= 9) ? static_cast<AccessQualifier>(ops[8]) : AccessQualifierMax;
-
- if (type.image.sampled == 0)
- SPIRV_CROSS_THROW("OpTypeImage Sampled parameter must not be zero.");
-
break;
}
@@ -649,7 +661,7 @@ void Parser::parse(const Instruction &instruction)
}
}
- if (type.type_alias == 0)
+ if (type.type_alias == TypeID(0))
global_struct_cache.push_back(id);
}
break;
@@ -999,12 +1011,12 @@ void Parser::parse(const Instruction &instruction)
ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT;
ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT;
- ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self;
+ ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self);
// Don't add loop headers to continue blocks,
// which would make it impossible branch into the loop header since
// they are treated as continues.
- if (current_block->continue_block != current_block->self)
+ if (current_block->continue_block != BlockID(current_block->self))
ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT;
if (length >= 3)
@@ -1030,6 +1042,37 @@ void Parser::parse(const Instruction &instruction)
break;
}
+ case OpLine:
+ {
+ // OpLine might come at global scope, but we don't care about those since they will not be declared in any
+ // meaningful correct order.
+ // Ignore all OpLine directives which live outside a function.
+ if (current_block)
+ current_block->ops.push_back(instruction);
+
+ // Line directives may arrive before first OpLabel.
+ // Treat this as the line of the function declaration,
+ // so warnings for arguments can propagate properly.
+ if (current_function)
+ {
+ // Store the first one we find and emit it before creating the function prototype.
+ if (current_function->entry_line.file_id == 0)
+ {
+ current_function->entry_line.file_id = ops[0];
+ current_function->entry_line.line_literal = ops[1];
+ }
+ }
+ break;
+ }
+
+ case OpNoLine:
+ {
+ // OpNoLine might come at global scope.
+ if (current_block)
+ current_block->ops.push_back(instruction);
+ break;
+ }
+
// Actual opcodes.
default:
{
diff --git a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
index 0b2c585..1e8f615 100644
--- a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
+++ b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp
@@ -61,6 +61,7 @@ public:
void end_json_array();
void emit_json_array_value(const std::string &value);
void emit_json_array_value(uint32_t value);
+ void emit_json_array_value(bool value);
std::string str() const
{
@@ -158,6 +159,16 @@ void Stream::emit_json_array_value(uint32_t value)
stack.top().second = true;
}
+void Stream::emit_json_array_value(bool value)
+{
+ if (stack.empty() || stack.top().first != Type::Array)
+ SPIRV_CROSS_THROW("Invalid JSON state");
+ if (stack.top().second)
+ statement_inner(",\n");
+ statement_no_return(value ? "true" : "false");
+ stack.top().second = true;
+}
+
void Stream::begin_json_object()
{
if (!stack.empty() && stack.top().second)
@@ -256,6 +267,8 @@ string CompilerReflection::compile()
json_stream = std::make_shared<simple_json::Stream>();
json_stream->set_current_locale_radix_character(current_locale_radix_character);
json_stream->begin_json_object();
+ fixup_type_alias();
+ reorder_type_alias();
emit_entry_points();
emit_types();
emit_resources();
@@ -283,7 +296,7 @@ void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag)
{
auto name = type_to_glsl(type);
- if (type.type_alias != 0)
+ if (type.type_alias != TypeID(0))
return;
if (!emitted_open_tag)
@@ -422,6 +435,28 @@ void CompilerReflection::emit_entry_points()
json_stream->begin_json_object();
json_stream->emit_json_key_value("name", e.name);
json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
+ if (e.execution_model == ExecutionModelGLCompute)
+ {
+ const auto &spv_entry = get_entry_point(e.name, e.execution_model);
+
+ SpecializationConstant spec_x, spec_y, spec_z;
+ get_work_group_size_specialization_constants(spec_x, spec_y, spec_z);
+
+ json_stream->emit_json_key_array("workgroup_size");
+ json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id :
+ spv_entry.workgroup_size.x);
+ json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id :
+ spv_entry.workgroup_size.y);
+ json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id :
+ spv_entry.workgroup_size.z);
+ json_stream->end_json_array();
+
+ json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id");
+ json_stream->emit_json_array_value(spec_x.id != ID(0));
+ json_stream->emit_json_array_value(spec_y.id != ID(0));
+ json_stream->emit_json_array_value(spec_z.id != ID(0));
+ json_stream->end_json_array();
+ }
json_stream->end_json_object();
}
json_stream->end_json_array();
@@ -466,7 +501,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou
bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) ||
get_decoration_bitset(type.self).get(DecorationBufferBlock);
- uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id;
+ ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id);
json_stream->begin_json_object();